Index: components/url_formatter/url_formatter_unittest.cc |
diff --git a/components/url_formatter/url_formatter_unittest.cc b/components/url_formatter/url_formatter_unittest.cc |
index 0dd635a9488c9dc89b43e574e11ccd548d1b30e6..81ca50c3c8f8cd1ad98506f4e0768e4e2be85434 100644 |
--- a/components/url_formatter/url_formatter_unittest.cc |
+++ b/components/url_formatter/url_formatter_unittest.cc |
@@ -25,327 +25,120 @@ using base::ASCIIToUTF16; |
const size_t kNpos = base::string16::npos; |
-const char* const kLanguages[] = { |
- "", "en", "zh-CN", "ja", "ko", |
- "he", "ar", "ru", "el", "fr", |
- "de", "pt", "sv", "th", "hi", |
- "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", |
- "zh,ru,en" |
-}; |
- |
struct IDNTestCase { |
const char* const input; |
const wchar_t* unicode_output; |
- const bool unicode_allowed[arraysize(kLanguages)]; |
+ const bool unicode_allowed; |
}; |
// TODO(jungshik) This is just a random sample of languages and is far |
// from exhaustive. We may have to generate all the combinations |
// of languages (powerset of a set of all the languages). |
const IDNTestCase idn_cases[] = { |
- // No IDN |
- {"www.google.com", L"www.google.com", |
- {true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true}}, |
- {"www.google.com.", L"www.google.com.", |
- {true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true}}, |
- {".", L".", |
- {true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true}}, |
- {"", L"", |
- {true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true}}, |
- // IDN |
- // Hanzi (Traditional Chinese) |
- {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", |
- {true, false, true, true, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, true, true, false, |
- true}}, |
- // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) |
- {"xn--cy2a840a.com", L"\x89c6\x9891.com", |
- {true, false, true, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- true}}, |
- // Hanzi + '123' |
- {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", |
- {true, false, true, true, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, true, true, false, |
- true}}, |
- // Hanzi + Latin : U+56FD is simplified and is regarded |
- // as not supported in zh-TW. |
- {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", |
- {false, false, true, true, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, true, false, |
- true}}, |
- // Kanji + Kana (Japanese) |
- {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", |
- {true, false, false, true, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, true, false, |
- false}}, |
- // Katakana including U+30FC |
- {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", |
- {true, false, false, true, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, true, false, |
- }}, |
- {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", |
- {true, false, false, true, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, true, false, |
- }}, |
- // Katakana + Latin (Japanese) |
- // TODO(jungshik): Change 'false' in the first element to 'true' |
- // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead |
- // of our IsIDNComponentInSingleScript(). |
- {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", |
- {false, false, false, true, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, true, false, |
- }}, |
- {"xn--3bkxe.jp", L"\x30c8\x309a.jp", |
- {false, false, false, true, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, true, false, |
- }}, |
- // Hangul (Korean) |
- {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", |
- {true, false, false, false, true, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, true, false, |
- false}}, |
- // b<u-umlaut>cher (German) |
- {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", |
- {true, false, false, false, false, |
- false, false, false, false, true, |
- true, false, false, false, false, |
- true, false, false, false, false, |
- false}}, |
- // a with diaeresis |
- {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", |
- {true, false, false, false, false, |
- false, false, false, false, false, |
- true, false, true, false, false, |
- true, false, false, false, false, |
- false}}, |
- // c-cedilla (French) |
- {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", |
- {true, false, false, false, false, |
- false, false, false, false, true, |
- false, true, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // caf'e with acute accent' (French) |
- {"xn--caf-dma.fr", L"caf\x00e9.fr", |
- {true, false, false, false, false, |
- false, false, false, false, true, |
- false, true, true, false, false, |
- false, false, false, false, false, |
- false}}, |
- // c-cedillla and a with tilde (Portuguese) |
- {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", |
- {true, false, false, false, false, |
- false, false, false, false, false, |
- false, true, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // s with caron |
- {"xn--achy-f6a.com", L"\x0161" L"achy.com", |
- {true, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // TODO(jungshik) : Add examples with Cyrillic letters |
- // only used in some languages written in Cyrillic. |
- // Eutopia (Greek) |
- {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", |
- {true, false, false, false, false, |
- false, false, false, true, false, |
- false, false, false, false, false, |
- false, true, false, false, false, |
- false}}, |
- // Eutopia + 123 (Greek) |
- {"xn---123-pldm0haj2bk.gr", |
- L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", |
- {true, false, false, false, false, |
- false, false, false, true, false, |
- false, false, false, false, false, |
- false, true, false, false, false, |
- false}}, |
- // Cyrillic (Russian) |
- {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", |
- {true, false, false, false, false, |
- false, false, true, false, false, |
- false, false, false, false, false, |
- false, false, false, false, true, |
- true}}, |
- // Cyrillic + 123 (Russian) |
- {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", |
- {true, false, false, false, false, |
- false, false, true, false, false, |
- false, false, false, false, false, |
- false, false, false, false, true, |
- true}}, |
- // Arabic |
- {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", |
- {true, false, false, false, false, |
- false, true, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // Hebrew |
- {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", |
- {true, false, false, false, false, |
- true, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, true, |
- false}}, |
- // Thai |
- {"xn--12c2cc4ag3b4ccu.th", |
- L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", |
- {true, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, true, false, |
- false, false, false, false, false, |
- false}}, |
- // Devangari (Hindi) |
- {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", |
- {true, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, true, |
- false, false, false, false, false, |
- false}}, |
- // Invalid IDN |
- {"xn--hello?world.com", NULL, |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // Unsafe IDNs |
- // "payp<alpha>l.com" |
- {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // google.gr with Greek omicron and epsilon |
- {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // google.ru with Cyrillic o |
- {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // h<e with acute>llo<China in Han>.cn |
- {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // <Greek rho><Cyrillic a><Cyrillic u>.ru |
- {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- // One that's really long that will force a buffer realloc |
- {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
- "aaaaaaa", |
- L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
- L"aaaaaaaa", |
- {true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true, true, true, true, true, |
- true}}, |
- // Test cases for characters we blacklisted although allowed in IDN. |
- // Embedded spaces will be turned to %20 in the display. |
- // TODO(jungshik): We need to have more cases. This is a typical |
- // data-driven trap. The following test cases need to be separated |
- // and tested only for a couple of languages. |
- {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false}}, |
- {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- }}, |
- {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- }}, |
- {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- }}, |
- {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- }}, |
- // Padlock icon spoof. |
- {"xn--google-hj64e", L"\U0001f512google.com", |
- {false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- }}, |
- // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist |
- // all strings with the surrogate '\xdd12'. |
- {"xn--fk9c.com", L"\U00010912.com", |
- {true, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- false, false, false, false, false, |
- }}, |
+ // No IDN |
+ {"www.google.com", L"www.google.com", true}, |
+ {"www.google.com.", L"www.google.com.", true}, |
+ {".", L".", true}, |
+ {"", L"", true}, |
+ // IDN |
+ // Hanzi (Traditional Chinese) |
+ {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, |
+ // Hanzi ('video' in Simplified Chinese |
+ {"xn--cy2a840a.com", L"\x89c6\x9891.com", true}, |
+ // Hanzi + '123' |
+ {"www.xn--123-p18d.com", |
+ L"www.\x4e00" |
+ L"123.com", |
+ true}, |
+ // Hanzi + Latin : U+56FD is simplified |
+ {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true}, |
+ // Kanji + Kana (Japanese) |
+ {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true}, |
+ // Katakana including U+30FC |
+ {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true}, |
+ {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true}, |
+ // Katakana + Latin (Japanese) |
+ {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true}, |
+ {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true}, |
+ // Hangul (Korean) |
+ {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true}, |
+ // b<u-umlaut>cher (German) |
+ {"xn--bcher-kva.de", |
+ L"b\x00fc" |
+ L"cher.de", |
+ true}, |
+ // a with diaeresis |
+ {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true}, |
+ // c-cedilla (French) |
+ {"www.xn--alliancefranaise-npb.fr", |
+ L"www.alliancefran\x00e7" |
+ L"aise.fr", |
+ true}, |
+ // caf'e with acute accent' (French) |
+ {"xn--caf-dma.fr", L"caf\x00e9.fr", true}, |
+ // c-cedillla and a with tilde (Portuguese) |
+ {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true}, |
+ // s with caron |
+ {"xn--achy-f6a.com", |
+ L"\x0161" |
+ L"achy.com", |
+ true}, |
+ {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", |
+ true}, |
+ // Eutopia + 123 (Greek) |
+ {"xn---123-pldm0haj2bk.gr", |
+ L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true}, |
+ // Cyrillic (Russian) |
+ {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true}, |
+ // Cyrillic + 123 (Russian) |
+ {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true}, |
+ // Arabic |
+ {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true}, |
+ // Hebrew |
+ {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true}, |
+ // Thai |
+ {"xn--12c2cc4ag3b4ccu.th", |
+ L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, |
+ // Devangari (Hindi) |
+ {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, |
+ // Invalid IDN |
+ {"xn--hello?world.com", NULL, false}, |
+ // Unsafe IDNs |
+ // "payp<alpha>l.com" |
+ {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, |
+ // google.gr with Greek omicron and epsilon |
+ {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false}, |
+ // google.ru with Cyrillic o |
+ {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false}, |
+ // h<e with acute>llo<China in Han>.cn |
+ {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false}, |
+ // <Greek rho><Cyrillic a><Cyrillic u>.ru |
+ {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false}, |
+ // One that's really long that will force a buffer realloc |
+ {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
+ "aaaaaaa", |
+ L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
+ L"aaaaaaaa", |
+ true}, |
+ {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false}, |
+ {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false}, |
+ // Test cases for characters we blacklisted although allowed in IDN. |
+ {"google.xn--comabc-k8d", |
+ L"google.com\x0338" |
+ L"abc", |
+ false}, |
+ {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false}, |
+ {"google.xn--comevil-v04f.jp", |
+ L"google.com\x30ce" |
+ L"evil.jp", |
+ false}, |
+ // Padlock icon spoof. |
+ {"xn--google-hj64e", L"\U0001f512google.com", false}, |
+ // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist |
+ // all strings with the surrogate '\xdd12'. |
+ {"xn--fk9c.com", L"\U00010912.com", false}, |
+ {"xn--g6h.com", L"\x2665.com", true}, |
+ {"xn--2ci.com", L"\x272a.com", true}, |
#if 0 |
// These two cases are special. We need a separate test. |
// U+3000 and U+3002 are normalized to ASCII space and dot. |
@@ -372,24 +165,12 @@ struct AdjustOffsetCase { |
struct UrlTestData { |
const char* const description; |
const char* const input; |
- const char* const languages; |
FormatUrlTypes format_types; |
net::UnescapeRule::Type escape_rules; |
const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. |
size_t prefix_len; |
}; |
-// A helper for IDN*{Fast,Slow}. |
-// Append "::<language list>" to |expected| and |actual| to make it |
-// easy to tell which sub-case fails without debugging. |
-void AppendLanguagesToOutputs(const char* languages, |
- base::string16* expected, |
- base::string16* actual) { |
- base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); |
- expected->append(to_append); |
- actual->append(to_append); |
-} |
- |
// A pair of helpers for the FormatUrlWithOffsets() test. |
void VerboseExpect(size_t expected, |
size_t actual, |
@@ -401,7 +182,6 @@ void VerboseExpect(size_t expected, |
} |
void CheckAdjustedOffsets(const std::string& url_string, |
- const std::string& languages, |
FormatUrlTypes format_types, |
net::UnescapeRule::Type unescape_rules, |
const size_t* output_offsets) { |
@@ -412,7 +192,7 @@ void CheckAdjustedOffsets(const std::string& url_string, |
offsets.push_back(i); |
offsets.push_back(500000); // Something larger than any input length. |
offsets.push_back(std::string::npos); |
- base::string16 formatted_url = FormatUrlWithOffsets(url, languages, |
+ base::string16 formatted_url = FormatUrlWithOffsets(url, std::string(), |
format_types, unescape_rules, NULL, NULL, &offsets); |
for (size_t i = 0; i < url_length; ++i) |
VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); |
@@ -424,113 +204,67 @@ void CheckAdjustedOffsets(const std::string& url_string, |
std::string::npos, formatted_url); |
} |
-TEST(UrlFormatterTest, IDNToUnicodeFast) { |
+TEST(UrlFormatterTest, IDNToUnicode) { |
for (size_t i = 0; i < arraysize(idn_cases); i++) { |
- for (size_t j = 0; j < arraysize(kLanguages); j++) { |
- // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow |
- if (j == 3 || j == 17 || j == 18) |
- continue; |
- base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); |
- base::string16 expected(idn_cases[i].unicode_allowed[j] ? |
- WideToUTF16(idn_cases[i].unicode_output) : |
- ASCIIToUTF16(idn_cases[i].input)); |
- AppendLanguagesToOutputs(kLanguages[j], &expected, &output); |
- EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input |
- << "\", languages: \"" << kLanguages[j] |
- << "\""; |
- } |
- } |
-} |
- |
-TEST(UrlFormatterTest, IDNToUnicodeSlow) { |
- for (size_t i = 0; i < arraysize(idn_cases); i++) { |
- for (size_t j = 0; j < arraysize(kLanguages); j++) { |
- // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast |
- if (!(j == 3 || j == 17 || j == 18)) |
- continue; |
- base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); |
- base::string16 expected(idn_cases[i].unicode_allowed[j] ? |
- WideToUTF16(idn_cases[i].unicode_output) : |
- ASCIIToUTF16(idn_cases[i].input)); |
- AppendLanguagesToOutputs(kLanguages[j], &expected, &output); |
- EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input |
- << "\", languages: \"" << kLanguages[j] |
- << "\""; |
- } |
- } |
-} |
- |
-// ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and |
-// te), which was causing a crash (See http://crbug.com/510551). This may be an |
-// icu bug, but regardless, that should not cause a crash. |
-TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) { |
- for (char c1 = 'a'; c1 <= 'z'; c1++) { |
- for (char c2 = 'a'; c2 <= 'z'; c2++) { |
- std::string lang = base::StringPrintf("%c%c", c1, c2); |
- base::string16 output(IDNToUnicode("xn--74h", lang)); |
- } |
+ base::string16 output(IDNToUnicode(idn_cases[i].input, std::string())); |
+ base::string16 expected(idn_cases[i].unicode_allowed |
+ ? WideToUTF16(idn_cases[i].unicode_output) |
+ : ASCIIToUTF16(idn_cases[i].input)); |
+ EXPECT_EQ(expected, output) << "input # " << i << ": \"" |
+ << idn_cases[i].input << "\""; |
} |
} |
TEST(UrlFormatterTest, FormatUrl) { |
FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; |
const UrlTestData tests[] = { |
- {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"", |
+ {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"", |
0}, |
- {"Simple URL", "http://www.google.com/", "", default_format_type, |
+ {"Simple URL", "http://www.google.com/", default_format_type, |
net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, |
{"With a port number and a reference", |
- "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, |
+ "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type, |
net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, |
// -------- IDN tests -------- |
- {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja", |
+ {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", |
default_format_type, net::UnescapeRule::NORMAL, |
L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, |
- {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en", |
- default_format_type, net::UnescapeRule::NORMAL, |
- L"http://xn--l8jvb1ey91xtjb.jp/", 7}, |
- |
- {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "", |
- default_format_type, net::UnescapeRule::NORMAL, |
- // Single script is safe for empty languages. |
- L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, |
- |
- {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", |
+ {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", |
default_format_type, net::UnescapeRule::NORMAL, |
// GURL doesn't assume an email address's domain part as a host name. |
L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, |
{"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", |
- "ja", default_format_type, net::UnescapeRule::NORMAL, |
+ default_format_type, net::UnescapeRule::NORMAL, |
L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, |
- {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", |
+ {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", |
default_format_type, net::UnescapeRule::NORMAL, |
L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, |
// -------- omit_username_password flag tests -------- |
{"With username and password, omit_username_password=false", |
- "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing, |
+ "http://user:passwd@example.com/foo", kFormatUrlOmitNothing, |
net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, |
{"With username and password, omit_username_password=true", |
- "http://user:passwd@example.com/foo", "", default_format_type, |
+ "http://user:passwd@example.com/foo", default_format_type, |
net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, |
- {"With username and no password", "http://user@example.com/foo", "", |
+ {"With username and no password", "http://user@example.com/foo", |
default_format_type, net::UnescapeRule::NORMAL, |
L"http://example.com/foo", 7}, |
- {"Just '@' without username and password", "http://@example.com/foo", "", |
+ {"Just '@' without username and password", "http://@example.com/foo", |
default_format_type, net::UnescapeRule::NORMAL, |
L"http://example.com/foo", 7}, |
// GURL doesn't think local-part of an email address is username for URL. |
- {"mailto:, omit_username_password=true", "mailto:foo@example.com", "", |
+ {"mailto:, omit_username_password=true", "mailto:foo@example.com", |
default_format_type, net::UnescapeRule::NORMAL, |
L"mailto:foo@example.com", 7}, |
@@ -539,9 +273,9 @@ TEST(UrlFormatterTest, FormatUrl) { |
"http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" |
"%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
- "en", default_format_type, net::UnescapeRule::NONE, |
+ default_format_type, net::UnescapeRule::NONE, |
// GURL parses %-encoded hostnames into Punycode. |
- L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
+ L"http://\x30B0\x30FC\x30B0\x30EB.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
7}, |
@@ -549,91 +283,91 @@ TEST(UrlFormatterTest, FormatUrl) { |
"http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" |
"%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
- "en", default_format_type, net::UnescapeRule::NORMAL, |
- L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" |
+ default_format_type, net::UnescapeRule::NORMAL, |
+ L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB" |
L"?q=\x30B0\x30FC\x30B0\x30EB", |
7}, |
{"Unescape normally with BiDi control character", |
- "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", |
+ "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", |
default_format_type, net::UnescapeRule::NORMAL, |
L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, |
{"Unescape normally including unescape spaces", |
- "http://www.google.com/search?q=Hello%20World", "en", |
+ "http://www.google.com/search?q=Hello%20World", |
default_format_type, net::UnescapeRule::SPACES, |
L"http://www.google.com/search?q=Hello World", 7}, |
/* |
{"unescape=true with some special characters", |
- "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", |
+ "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", |
kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, |
*/ |
// Disabled: the resultant URL becomes "...user%253A:%2540passwd...". |
// -------- omit http: -------- |
- {"omit http with user name", "http://user@example.com/foo", "", |
+ {"omit http with user name", "http://user@example.com/foo", |
kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, |
- {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP, |
+ {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP, |
net::UnescapeRule::NORMAL, L"www.google.com/", 0}, |
- {"omit http with https", "https://www.google.com/", "en", |
+ {"omit http with https", "https://www.google.com/", |
kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, |
L"https://www.google.com/", 8}, |
- {"omit http starts with ftp.", "http://ftp.google.com/", "en", |
+ {"omit http starts with ftp.", "http://ftp.google.com/", |
kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", |
7}, |
// -------- omit trailing slash on bare hostname -------- |
- {"omit slash when it's the entire path", "http://www.google.com/", "en", |
+ {"omit slash when it's the entire path", "http://www.google.com/", |
kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
L"http://www.google.com", 7}, |
- {"omit slash when there's a ref", "http://www.google.com/#ref", "en", |
+ {"omit slash when there's a ref", "http://www.google.com/#ref", |
kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
L"http://www.google.com/#ref", 7}, |
- {"omit slash when there's a query", "http://www.google.com/?", "en", |
+ {"omit slash when there's a query", "http://www.google.com/?", |
kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
L"http://www.google.com/?", 7}, |
{"omit slash when it's not the entire path", "http://www.google.com/foo", |
- "en", kFormatUrlOmitTrailingSlashOnBareHostname, |
+ kFormatUrlOmitTrailingSlashOnBareHostname, |
net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, |
- {"omit slash for nonstandard URLs", "data:/", "en", |
+ {"omit slash for nonstandard URLs", "data:/", |
kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
L"data:/", 5}, |
- {"omit slash for file URLs", "file:///", "en", |
+ {"omit slash for file URLs", "file:///", |
kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
L"file:///", 7}, |
// -------- view-source: -------- |
- {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja", |
+ {"view-source", "view-source:http://xn--qcka1pmc.jp/", |
default_format_type, net::UnescapeRule::NORMAL, |
L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, |
{"view-source of view-source", |
- "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", |
+ "view-source:view-source:http://xn--qcka1pmc.jp/", |
default_format_type, net::UnescapeRule::NORMAL, |
L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, |
// view-source should omit http and trailing slash where non-view-source |
// would. |
- {"view-source omit http", "view-source:http://a.b/c", "en", |
+ {"view-source omit http", "view-source:http://a.b/c", |
kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, |
{"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", |
- "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL, |
+ kFormatUrlOmitAll, net::UnescapeRule::NORMAL, |
L"view-source:http://ftp.b/c", 19}, |
{"view-source omit slash when it's the entire path", |
- "view-source:http://a.b/", "en", kFormatUrlOmitAll, |
+ "view-source:http://a.b/", kFormatUrlOmitAll, |
net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, |
}; |
for (size_t i = 0; i < arraysize(tests); ++i) { |
size_t prefix_len; |
base::string16 formatted = FormatUrl( |
- GURL(tests[i].input), tests[i].languages, tests[i].format_types, |
- tests[i].escape_rules, NULL, &prefix_len, NULL); |
+ GURL(tests[i].input), std::string(), tests[i].format_types, |
+ tests[i].escape_rules, NULL, &prefix_len, NULL); |
EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; |
EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; |
} |
@@ -645,8 +379,8 @@ TEST(UrlFormatterTest, FormatUrlParsed) { |
base::string16 formatted = |
FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" |
"%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), |
- "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed, |
- NULL, NULL); |
+ std::string(), kFormatUrlOmitNothing, net::UnescapeRule::NONE, |
+ &parsed, NULL, NULL); |
EXPECT_EQ(WideToUTF16( |
L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" |
L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); |
@@ -861,14 +595,14 @@ TEST(UrlFormatterTest, FormatUrlRoundTripQueryEscaped) { |
} |
TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
- CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, |
+ CheckAdjustedOffsets(std::string(), kFormatUrlOmitNothing, |
net::UnescapeRule::NORMAL, NULL); |
const size_t basic_offsets[] = { |
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
21, 22, 23, 24, 25 |
}; |
- CheckAdjustedOffsets("http://www.google.com/foo/", "en", |
+ CheckAdjustedOffsets("http://www.google.com/foo/", |
kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
basic_offsets); |
@@ -876,7 +610,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, |
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
}; |
- CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", |
+ CheckAdjustedOffsets("http://foo:bar@www.google.com/", |
kFormatUrlOmitUsernamePassword, |
net::UnescapeRule::NORMAL, omit_auth_offsets_1); |
@@ -884,7 +618,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, |
15, 16, 17, 18, 19, 20, 21 |
}; |
- CheckAdjustedOffsets("http://foo@www.google.com/", "en", |
+ CheckAdjustedOffsets("http://foo@www.google.com/", |
kFormatUrlOmitUsernamePassword, |
net::UnescapeRule::NORMAL, omit_auth_offsets_2); |
@@ -895,7 +629,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
30, 31 |
}; |
// Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". |
- CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", |
+ CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", |
kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
dont_omit_auth_offsets); |
@@ -903,7 +637,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, |
kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 |
}; |
- CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", |
+ CheckAdjustedOffsets("view-source:http://foo@www.google.com/", |
kFormatUrlOmitUsernamePassword, |
net::UnescapeRule::NORMAL, view_source_offsets); |
@@ -913,7 +647,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
13, 14, 15, 16, 17, 18, 19 |
}; |
// Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". |
- CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", |
+ CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", |
kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
idn_hostname_offsets_1); |
@@ -926,7 +660,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
// Convert punycode to |
// "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". |
CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", |
- "zh-CN", kFormatUrlOmitNothing, |
+ kFormatUrlOmitNothing, |
net::UnescapeRule::NORMAL, idn_hostname_offsets_2); |
const size_t unescape_offsets[] = { |
@@ -939,7 +673,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
// Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". |
CheckAdjustedOffsets( |
"http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
- "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); |
+ kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); |
const size_t ref_offsets[] = { |
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
@@ -948,20 +682,20 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
}; |
// Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". |
CheckAdjustedOffsets( |
- "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", |
+ "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", |
kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); |
const size_t omit_http_offsets[] = { |
0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
10, 11, 12, 13, 14 |
}; |
- CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, |
+ CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP, |
net::UnescapeRule::NORMAL, omit_http_offsets); |
const size_t omit_http_start_with_ftp_offsets[] = { |
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
}; |
- CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, |
+ CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP, |
net::UnescapeRule::NORMAL, |
omit_http_start_with_ftp_offsets); |
@@ -969,7 +703,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
0, 1, 2, 3, 4, 5, 6, 7 |
}; |
- CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, |
+ CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
net::UnescapeRule::NORMAL, omit_all_offsets); |
} |