| OLD | NEW | 
|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" | 
| 6 | 6 | 
| 7 #include <stddef.h> | 7 #include <stddef.h> | 
| 8 #include <string.h> | 8 #include <string.h> | 
| 9 | 9 | 
| 10 #include <vector> | 10 #include <vector> | 
| 11 | 11 | 
| 12 #include "base/macros.h" | 12 #include "base/macros.h" | 
| 13 #include "base/strings/string_number_conversions.h" | 13 #include "base/strings/string_number_conversions.h" | 
| 14 #include "base/strings/stringprintf.h" | 14 #include "base/strings/stringprintf.h" | 
| 15 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" | 
| 16 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" | 
| 17 #include "url/gurl.h" | 17 #include "url/gurl.h" | 
| 18 | 18 | 
| 19 | 19 | 
| 20 namespace url_formatter { | 20 namespace url_formatter { | 
| 21 | 21 | 
| 22 namespace { | 22 namespace { | 
| 23 | 23 | 
| 24 using base::WideToUTF16; | 24 using base::WideToUTF16; | 
| 25 using base::ASCIIToUTF16; | 25 using base::ASCIIToUTF16; | 
| 26 | 26 | 
| 27 const size_t kNpos = base::string16::npos; | 27 const size_t kNpos = base::string16::npos; | 
| 28 | 28 | 
| 29 const char* const kLanguages[] = { |  | 
| 30   "",      "en",    "zh-CN",    "ja",    "ko", |  | 
| 31   "he",    "ar",    "ru",       "el",    "fr", |  | 
| 32   "de",    "pt",    "sv",       "th",    "hi", |  | 
| 33   "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", |  | 
| 34   "zh,ru,en" |  | 
| 35 }; |  | 
| 36 |  | 
| 37 struct IDNTestCase { | 29 struct IDNTestCase { | 
| 38   const char* const input; | 30   const char* const input; | 
| 39   const wchar_t* unicode_output; | 31   const wchar_t* unicode_output; | 
| 40   const bool unicode_allowed[arraysize(kLanguages)]; | 32   const bool unicode_allowed; | 
| 41 }; | 33 }; | 
| 42 | 34 | 
| 43 // TODO(jungshik) This is just a random sample of languages and is far |  | 
| 44 // from exhaustive.  We may have to generate all the combinations |  | 
| 45 // of languages (powerset of a set of all the languages). |  | 
| 46 const IDNTestCase idn_cases[] = { | 35 const IDNTestCase idn_cases[] = { | 
| 47   // No IDN | 36   // No IDN | 
| 48   {"www.google.com", L"www.google.com", | 37   {"www.google.com", L"www.google.com", true}, | 
| 49    {true,  true,  true,  true,  true, | 38   {"www.google.com.", L"www.google.com.", true}, | 
| 50     true,  true,  true,  true,  true, | 39   {".", L".", true}, | 
| 51     true,  true,  true,  true,  true, | 40   {"", L"", true}, | 
| 52     true,  true,  true,  true,  true, |  | 
| 53     true}}, |  | 
| 54   {"www.google.com.", L"www.google.com.", |  | 
| 55    {true,  true,  true,  true,  true, |  | 
| 56     true,  true,  true,  true,  true, |  | 
| 57     true,  true,  true,  true,  true, |  | 
| 58     true,  true,  true,  true,  true, |  | 
| 59     true}}, |  | 
| 60   {".", L".", |  | 
| 61    {true,  true,  true,  true,  true, |  | 
| 62     true,  true,  true,  true,  true, |  | 
| 63     true,  true,  true,  true,  true, |  | 
| 64     true,  true,  true,  true,  true, |  | 
| 65     true}}, |  | 
| 66   {"", L"", |  | 
| 67    {true,  true,  true,  true,  true, |  | 
| 68     true,  true,  true,  true,  true, |  | 
| 69     true,  true,  true,  true,  true, |  | 
| 70     true,  true,  true,  true,  true, |  | 
| 71     true}}, |  | 
| 72   // IDN | 41   // IDN | 
| 73   // Hanzi (Traditional Chinese) | 42   // Hanzi (Traditional Chinese) | 
| 74   {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", | 43   {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, | 
| 75    {true,  false, true,  true,  false, | 44   // Hanzi ('video' in Simplified Chinese | 
| 76     false, false, false, false, false, | 45   {"xn--cy2a840a.com", L"\x89c6\x9891.com", true}, | 
| 77     false, false, false, false, false, |  | 
| 78     false, false, true,  true,  false, |  | 
| 79     true}}, |  | 
| 80   // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) |  | 
| 81   {"xn--cy2a840a.com", L"\x89c6\x9891.com", |  | 
| 82    {true,  false, true,  false,  false, |  | 
| 83     false, false, false, false, false, |  | 
| 84     false, false, false, false, false, |  | 
| 85     false, false, false, false,  false, |  | 
| 86     true}}, |  | 
| 87   // Hanzi + '123' | 46   // Hanzi + '123' | 
| 88   {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", | 47   {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", true}, | 
| 89    {true,  false, true,  true,  false, | 48   // Hanzi + Latin : U+56FD is simplified | 
| 90     false, false, false, false, false, | 49   {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true}, | 
| 91     false, false, false, false, false, |  | 
| 92     false, false, true,  true,  false, |  | 
| 93     true}}, |  | 
| 94   // Hanzi + Latin : U+56FD is simplified and is regarded |  | 
| 95   // as not supported in zh-TW. |  | 
| 96   {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", |  | 
| 97    {false, false, true,  true,  false, |  | 
| 98     false, false, false, false, false, |  | 
| 99     false, false, false, false, false, |  | 
| 100     false, false, false, true,  false, |  | 
| 101     true}}, |  | 
| 102   // Kanji + Kana (Japanese) | 50   // Kanji + Kana (Japanese) | 
| 103   {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", | 51   {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true}, | 
| 104    {true,  false, false, true,  false, |  | 
| 105     false, false, false, false, false, |  | 
| 106     false, false, false, false, false, |  | 
| 107     false, false, false, true,  false, |  | 
| 108     false}}, |  | 
| 109   // Katakana including U+30FC | 52   // Katakana including U+30FC | 
| 110   {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", | 53   {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true}, | 
| 111    {true, false, false, true,  false, | 54   {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true}, | 
| 112     false, false, false, false, false, |  | 
| 113     false, false, false, false, false, |  | 
| 114     false, false, false, true, false, |  | 
| 115     }}, |  | 
| 116   {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", |  | 
| 117    {true, false, false, true,  false, |  | 
| 118     false, false, false, false, false, |  | 
| 119     false, false, false, false, false, |  | 
| 120     false, false, false, true, false, |  | 
| 121     }}, |  | 
| 122   // Katakana + Latin (Japanese) | 55   // Katakana + Latin (Japanese) | 
| 123   // TODO(jungshik): Change 'false' in the first element to 'true' | 56   {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true}, | 
| 124   // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead | 57   {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true}, | 
| 125   // of our IsIDNComponentInSingleScript(). |  | 
| 126   {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", |  | 
| 127    {false, false, false, true,  false, |  | 
| 128     false, false, false, false, false, |  | 
| 129     false, false, false, false, false, |  | 
| 130     false, false, false, true, false, |  | 
| 131     }}, |  | 
| 132   {"xn--3bkxe.jp", L"\x30c8\x309a.jp", |  | 
| 133    {false, false, false, true,  false, |  | 
| 134     false, false, false, false, false, |  | 
| 135     false, false, false, false, false, |  | 
| 136     false, false, false, true, false, |  | 
| 137     }}, |  | 
| 138   // Hangul (Korean) | 58   // Hangul (Korean) | 
| 139   {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", | 59   {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true}, | 
| 140    {true,  false, false, false, true, |  | 
| 141     false, false, false, false, false, |  | 
| 142     false, false, false, false, false, |  | 
| 143     false, false, false, true,  false, |  | 
| 144     false}}, |  | 
| 145   // b<u-umlaut>cher (German) | 60   // b<u-umlaut>cher (German) | 
| 146   {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", | 61   {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", true}, | 
| 147    {true,  false, false, false, false, |  | 
| 148     false, false, false, false, true, |  | 
| 149     true,  false,  false, false, false, |  | 
| 150     true,  false, false, false, false, |  | 
| 151     false}}, |  | 
| 152   // a with diaeresis | 62   // a with diaeresis | 
| 153   {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", | 63   {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true}, | 
| 154    {true,  false, false, false, false, |  | 
| 155     false, false, false, false, false, |  | 
| 156     true,  false, true, false, false, |  | 
| 157     true,  false, false, false, false, |  | 
| 158     false}}, |  | 
| 159   // c-cedilla (French) | 64   // c-cedilla (French) | 
| 160   {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", | 65   {"www.xn--alliancefranaise-npb.fr", | 
| 161    {true,  false, false, false, false, | 66    L"www.alliancefran\x00e7" L"aise.fr", true}, | 
| 162     false, false, false, false, true, |  | 
| 163     false, true,  false, false, false, |  | 
| 164     false, false, false, false, false, |  | 
| 165     false}}, |  | 
| 166   // caf'e with acute accent' (French) | 67   // caf'e with acute accent' (French) | 
| 167   {"xn--caf-dma.fr", L"caf\x00e9.fr", | 68   {"xn--caf-dma.fr", L"caf\x00e9.fr", true}, | 
| 168    {true,  false, false, false, false, |  | 
| 169     false, false, false, false, true, |  | 
| 170     false, true,  true,  false, false, |  | 
| 171     false, false, false, false, false, |  | 
| 172     false}}, |  | 
| 173   // c-cedillla and a with tilde (Portuguese) | 69   // c-cedillla and a with tilde (Portuguese) | 
| 174   {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", | 70   {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true}, | 
| 175    {true,  false, false, false, false, |  | 
| 176     false, false, false, false, false, |  | 
| 177     false, true,  false, false, false, |  | 
| 178     false, false, false, false, false, |  | 
| 179     false}}, |  | 
| 180   // s with caron | 71   // s with caron | 
| 181   {"xn--achy-f6a.com", L"\x0161" L"achy.com", | 72   {"xn--achy-f6a.com", L"\x0161" L"achy.com", true}, | 
| 182    {true,  false, false, false, false, |  | 
| 183     false, false, false, false, false, |  | 
| 184     false, false, false, false, false, |  | 
| 185     false, false, false, false, false, |  | 
| 186     false}}, |  | 
| 187   // TODO(jungshik) : Add examples with Cyrillic letters |  | 
| 188   // only used in some languages written in Cyrillic. |  | 
| 189   // Eutopia (Greek) |  | 
| 190   {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", | 73   {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", | 
| 191    {true,  false, false, false, false, | 74    true}, | 
| 192     false, false, false, true,  false, |  | 
| 193     false, false, false, false, false, |  | 
| 194     false, true,  false, false, false, |  | 
| 195     false}}, |  | 
| 196   // Eutopia + 123 (Greek) | 75   // Eutopia + 123 (Greek) | 
| 197   {"xn---123-pldm0haj2bk.gr", | 76   {"xn---123-pldm0haj2bk.gr", | 
| 198    L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", | 77    L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true}, | 
| 199    {true,  false, false, false, false, |  | 
| 200     false, false, false, true,  false, |  | 
| 201     false, false, false, false, false, |  | 
| 202     false, true,  false, false, false, |  | 
| 203     false}}, |  | 
| 204   // Cyrillic (Russian) | 78   // Cyrillic (Russian) | 
| 205   {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", | 79   {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true}, | 
| 206    {true,  false, false, false, false, |  | 
| 207     false, false, true,  false, false, |  | 
| 208     false, false, false, false, false, |  | 
| 209     false, false, false, false, true, |  | 
| 210     true}}, |  | 
| 211   // Cyrillic + 123 (Russian) | 80   // Cyrillic + 123 (Russian) | 
| 212   {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", | 81   {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true}, | 
| 213    {true,  false, false, false, false, | 82   // 'president' in Russian. Is a wholescript confusable, but allowed. | 
| 214     false, false, true,  false, false, | 83   {"xn--d1abbgf6aiiy.xn--p1ai", | 
| 215     false, false, false, false, false, | 84     L"\x043f\x0440\x0435\x0437\x0438\x0434\x0435\x043d\x0442.\x0440\x0444", | 
| 216     false, false, false, false, true, | 85     true}, | 
| 217     true}}, |  | 
| 218   // Arabic | 86   // Arabic | 
| 219   {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", | 87   {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true}, | 
| 220    {true,  false, false, false, false, |  | 
| 221     false, true,  false, false, false, |  | 
| 222     false, false, false, false, false, |  | 
| 223     false, false, false, false, false, |  | 
| 224     false}}, |  | 
| 225   // Hebrew | 88   // Hebrew | 
| 226   {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", | 89   {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true}, | 
| 227    {true,  false, false, false, false, | 90   // Hebrew + Common | 
| 228     true,  false, false, false, false, | 91   {"xn---123-ptf2c5c6bt.il", L"\x05e2\x05d1\x05e8\x05d9\x05ea-123.il", true}, | 
| 229     false, false, false, false, false, |  | 
| 230     false, false, false, false, true, |  | 
| 231     false}}, |  | 
| 232   // Thai | 92   // Thai | 
| 233   {"xn--12c2cc4ag3b4ccu.th", | 93   {"xn--12c2cc4ag3b4ccu.th", | 
| 234    L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", | 94    L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, | 
| 235    {true,  false, false, false, false, | 95   // Thai + Common | 
| 236     false, false, false, false, false, | 96   {"xn---123-9goxcp8c9db2r.th", | 
| 237     false, false, false, true,  false, | 97     L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true}, | 
| 238     false, false, false, false, false, |  | 
| 239     false}}, |  | 
| 240   // Devangari (Hindi) | 98   // Devangari (Hindi) | 
| 241   {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", | 99   {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, | 
| 242    {true,  false, false, false, false, | 100   // Devanagari + Common | 
| 243     false, false, false, false, false, | 101   {"xn---123-kbjl2j0bl2k.in", | 
| 244     false, false, false, false, true, | 102     L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true}, | 
| 245     false, false, false, false, false, | 103 | 
| 246     false}}, | 104   // 5 Aspirational scripts | 
| 247   // Invalid IDN | 105   // Unifieid Canadian Syllabary | 
| 248   {"xn--hello?world.com", NULL, | 106   {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true}, | 
| 249    {false, false, false, false, false, | 107   // Tifinagh | 
| 250     false, false, false, false, false, | 108   {"xn--4ljxa2bb4a6bxb.ma", | 
| 251     false, false, false, false, false, | 109     L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true}, | 
| 252     false, false, false, false, false, | 110   // Tifinagh with a disallowed character(U+2D6F) | 
| 253     false}}, | 111   {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false}, | 
| 254   // Unsafe IDNs | 112   // Yi | 
|  | 113   {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true}, | 
|  | 114   // Mongolian - 'ordu' (place, camp) | 
|  | 115   {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", true}, | 
|  | 116   // Mongolian with a disallowed character | 
|  | 117   {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false}, | 
|  | 118   // Miao/Pollad | 
|  | 119   {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true}, | 
|  | 120 | 
|  | 121   // Script mixing tests | 
|  | 122   // The following script combinations are allowed. | 
|  | 123   // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin. | 
|  | 124   // ASCII-Latin + Japn (Kana + Han) | 
|  | 125   // ASCII-Latin + Kore (Hangul + Han) | 
|  | 126   // ASCII-Latin + Han + Bopomofo | 
|  | 127   // ASCII-Latin + any allowed script other than Cyrillic, Greek and Cherokee | 
| 255   // "payp<alpha>l.com" | 128   // "payp<alpha>l.com" | 
| 256   {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", | 129   {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, | 
| 257    {false, false, false, false, false, |  | 
| 258     false, false, false, false, false, |  | 
| 259     false, false, false, false, false, |  | 
| 260     false, false, false, false, false, |  | 
| 261     false}}, |  | 
| 262   // google.gr with Greek omicron and epsilon | 130   // google.gr with Greek omicron and epsilon | 
| 263   {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", | 131   {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false}, | 
| 264    {false, false, false, false, false, |  | 
| 265     false, false, false, false, false, |  | 
| 266     false, false, false, false, false, |  | 
| 267     false, false, false, false, false, |  | 
| 268     false}}, |  | 
| 269   // google.ru with Cyrillic o | 132   // google.ru with Cyrillic o | 
| 270   {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", | 133   {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false}, | 
| 271    {false, false, false, false, false, |  | 
| 272     false, false, false, false, false, |  | 
| 273     false, false, false, false, false, |  | 
| 274     false, false, false, false, false, |  | 
| 275     false}}, |  | 
| 276   // h<e with acute>llo<China in Han>.cn | 134   // h<e with acute>llo<China in Han>.cn | 
| 277   {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", | 135   {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false}, | 
| 278    {false, false, false, false, false, |  | 
| 279     false, false, false, false, false, |  | 
| 280     false, false, false, false, false, |  | 
| 281     false, false, false, false, false, |  | 
| 282     false}}, |  | 
| 283   // <Greek rho><Cyrillic a><Cyrillic u>.ru | 136   // <Greek rho><Cyrillic a><Cyrillic u>.ru | 
| 284   {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", | 137   {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false}, | 
| 285    {false, false, false, false, false, | 138   // Hangul + Latin | 
| 286     false, false, false, false, false, | 139   {"xn--han-eb9ll88m.kr", L"\xd55c\xae00han.kr", true}, | 
| 287     false, false, false, false, false, | 140   // Hangul + Latin + Han with IDN ccTLD | 
| 288     false, false, false, false, false, | 141   {"xn--han-or0kq92gkm3c.xn--3e0b707e", | 
| 289     false}}, | 142     L"\xd55c\xae00han\x97d3.\xd55c\xad6d", true}, | 
|  | 143   // non-ASCII Latin + Hangul | 
|  | 144   {"xn--caf-dma9024xvpg.kr", L"caf\x00e9\xce74\xd398.kr", false}, | 
|  | 145   // Hangul + Hiragana | 
|  | 146   {"xn--y9j3b9855e.kr", L"\xd55c\x3072\x3089.kr", false}, | 
|  | 147   // <Hiragana>.<Hangul> is allowed because script mixing check is per label. | 
|  | 148   {"xn--y9j3b.xn--3e0b707e", L"\x3072\x3089.\xd55c\xad6d", true}, | 
|  | 149   //  Traditional Han + Latin | 
|  | 150   {"xn--hanzi-u57ii69i.tw", L"\x6f22\x5b57hanzi.tw", true}, | 
|  | 151   //  Simplified Han + Latin | 
|  | 152   {"xn--hanzi-u57i952h.cn", L"\x6c49\x5b57hanzi.cn", true}, | 
|  | 153   // Simplified Han + Traditonal Han | 
|  | 154   {"xn--hanzi-if9kt8n.cn", L"\x6c49\x6f22hanzi.cn", true}, | 
|  | 155   //  Han + Hiragana + Katakana + Latin | 
|  | 156   {"xn--kanji-ii4dpizfq59yuykqr4b.jp", | 
|  | 157     L"\x632f\x308a\x4eee\x540d\x30ab\x30bfkanji.jp", true}, | 
|  | 158   // Han + Bopomofo | 
|  | 159   {"xn--5ekcde0577e87tc.tw", L"\x6ce8\x97f3\x3105\x3106\x3107\x3108.tw", true}, | 
|  | 160   // Han + Latin + Bopomofo | 
|  | 161   {"xn--bopo-ty4cghi8509kk7xd.tw", | 
|  | 162     L"\x6ce8\x97f3" L"bopo\x3105\x3106\x3107\x3108.tw", true}, | 
|  | 163   // Latin + Bopomofo | 
|  | 164   {"xn--bopomofo-hj5gkalm.tw", L"bopomofo\x3105\x3106\x3107\x3108.tw", true}, | 
|  | 165   // Bopomofo + Katakana | 
|  | 166   {"xn--lcka3d1bztghi.tw", | 
|  | 167     L"\x3105\x3106\x3107\x3108\x30ab\x30bf\x30ab\x30ca.tw", false}, | 
|  | 168   //  Bopomofo + Hangul | 
|  | 169   {"xn--5ekcde4543qbec.tw", L"\x3105\x3106\x3107\x3108\xc8fc\xc74c.tw", false}, | 
|  | 170   // Devanagari + Latin | 
|  | 171   {"xn--ab-3ofh8fqbj6h.in", L"ab\x0939\x093f\x0928\x094d\x0926\x0940.in", true}, | 
|  | 172   // Thai + Latin | 
|  | 173   {"xn--ab-jsi9al4bxdb6n.th", | 
|  | 174     L"ab\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22.th", true}, | 
|  | 175   // <vitamin in Katakana>b1.com | 
|  | 176   {"xn--b1-xi4a7cvc9f.com", L"\x30d3\x30bf\x30df\x30f3" L"b1.com", true}, | 
|  | 177   // Devanagari + Han | 
|  | 178   {"xn--t2bes3ds6749n.com", L"\x0930\x094b\x0932\x0947\x76e7\x0938.com", false}, | 
|  | 179   // Devanagari + Bengali | 
|  | 180   {"xn--11b0x.in", L"\x0915\x0995.in", false}, | 
|  | 181 | 
|  | 182   // Invisibility check | 
|  | 183   // Thai tone mark malek(U+0E48) repeated | 
|  | 184   {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, | 
|  | 185   // Accute accent repeated | 
|  | 186   {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, | 
|  | 187   // 'a' with acuted accent + another acute accent | 
|  | 188   {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, | 
|  | 189 | 
|  | 190   // Mixed script confusable | 
|  | 191   // google with Armenian Small Letter Oh(U+0585) | 
|  | 192   {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, | 
|  | 193   // Hiragana HE(U+3078) mixed with Katakana | 
|  | 194   {"xn--49jxi3as0d0fpc.com", | 
|  | 195     L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, | 
|  | 196   // U+30FC + Han | 
|  | 197   {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, | 
|  | 198   // Han + U+30FC + Han | 
|  | 199   {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, | 
|  | 200   // Latin + U+30FC + Latin | 
|  | 201   {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, | 
|  | 202   // Latin + U+30FB + Latin | 
|  | 203   {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, | 
|  | 204   // U+30FB + Latin | 
|  | 205   {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, | 
|  | 206 | 
|  | 207   // Mixed digits: the first two will also fail mixed script test | 
|  | 208   // Latin + ASCII digit + Deva digit | 
|  | 209   {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, | 
|  | 210   // Latin + Deva digit + Beng digit | 
|  | 211   {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, | 
|  | 212   // ASCII digit + Deva digit | 
|  | 213   {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, | 
|  | 214   //  Deva digit + Beng digit | 
|  | 215   {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, | 
|  | 216   // U+4E00 (CJK Ideograph One) is not a digit | 
|  | 217   {"xn--d12-s18d.cn", L"d12\x4e00.cn", true}, | 
| 290   // One that's really long that will force a buffer realloc | 218   // One that's really long that will force a buffer realloc | 
| 291   {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | 219   {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | 
| 292        "aaaaaaa", | 220    "aaaaaaa", | 
| 293    L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | 221    L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | 
| 294        L"aaaaaaaa", | 222    L"aaaaaaaa", | 
| 295    {true,  true,  true,  true,  true, | 223    true}, | 
| 296     true,  true,  true,  true,  true, | 224 | 
| 297     true,  true,  true,  true,  true, | 225   // Not allowed; characters outside [:Identifier_Status=Allowed:] | 
| 298     true,  true,  true,  true,  true, | 226   // Limited Use Scripts: UTS 31 Table 7. | 
| 299     true}}, | 227   // Vai | 
| 300   // Test cases for characters we blacklisted although allowed in IDN. | 228   {"xn--sn8a.com", L"\xa50b.com", false}, | 
| 301   // Embedded spaces will be turned to %20 in the display. | 229   // 'CARD' look-alike in Cherokee | 
| 302   // TODO(jungshik): We need to have more cases. This is a typical | 230   {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false}, | 
| 303   // data-driven trap. The following test cases need to be separated | 231   // Scripts excluded from Identifiers: UTS 31 Table 4 | 
| 304   // and tested only for a couple of languages. | 232   // Coptic | 
| 305   {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", | 233   {"xn--5ya.com", L"\x03e7.com", false}, | 
| 306     {false, false, false, false, false, | 234   // Old Italic | 
| 307      false, false, false, false, false, | 235   {"xn--097cc.com", L"\U00010300\U00010301.com", false}, | 
| 308      false, false, false, false, false, | 236 | 
| 309      false, false, false, false, false, | 237   // U+115F (Hangul Filler) | 
| 310      false}}, | 238   {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false}, | 
| 311   {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", | 239   {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false}, | 
| 312     {false, false, false, false, false, | 240   // Latin small capital w | 
| 313      false, false, false, false, false, | 241   {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false}, | 
| 314      false, false, false, false, false, | 242   // Minus Sign(U+2212) | 
| 315      false, false, false, false, false, | 243   {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false}, | 
| 316   }}, | 244   // Latin Small Letter Script G | 
| 317   {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", | 245   {"xn--0naa.com", L"\x0261\x0261.com", false}, | 
| 318     {false, false, false, false, false, | 246   // Hangul Jamo(U+11xx) | 
| 319      false, false, false, false, false, | 247   {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false}, | 
| 320      false, false, false, false, false, | 248   // degree sign | 
| 321      false, false, false, false, false, | 249   {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false}, | 
| 322   }}, | 250   // Pound sign | 
| 323   {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", | 251   {"xn--5free-9ga.com", L"5free\x00a8.com", false}, | 
| 324     {false, false, false, false, false, | 252   // Hebrew points (U+05B0, U+05B6) | 
| 325      false, false, false, false, false, | 253   {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false}, | 
| 326      false, false, false, false, false, | 254   // Danda(U+0964) | 
| 327      false, false, false, false, false, | 255   {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false}, | 
| 328   }}, | 256   // Small letter script G(U+0261) | 
| 329   {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", | 257   {"xn--oogle-qmc.com", L"\x0261oogle.com", false}, | 
| 330     {false, false, false, false, false, | 258   // Small Katakana Extension(U+31F1) | 
| 331      false, false, false, false, false, | 259   {"xn--wlk.com", L"\x31f1.com", false}, | 
| 332      false, false, false, false, false, | 260   // Heart symbol | 
| 333      false, false, false, false, false, | 261   {"xn--ab-u0x.com", L"ab\x2665.com", false}, | 
| 334   }}, | 262   // Emoji | 
|  | 263   {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false}, | 
|  | 264   // Registered trade mark | 
|  | 265   {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false}, | 
|  | 266   // Latin Letter Retroflex Click | 
|  | 267   {"xn--registered-25c.com", L"registered\x01c3.com", false}, | 
|  | 268   // ASCII '!' not allowed in IDN | 
|  | 269   {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false}, | 
|  | 270   // 'GOOGLE' in IPA extension | 
|  | 271   {"xn--1naa7pn51hcbaa.com", | 
|  | 272     L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false}, | 
| 335   // Padlock icon spoof. | 273   // Padlock icon spoof. | 
| 336   {"xn--google-hj64e", L"\U0001f512google.com", | 274   {"xn--google-hj64e", L"\U0001f512google.com", false}, | 
| 337     {false, false, false, false, false, | 275 | 
| 338      false, false, false, false, false, | 276   // Custom black list | 
| 339      false, false, false, false, false, | 277   // Combining Long Solidus Overlay | 
| 340      false, false, false, false, false, | 278   {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false}, | 
| 341   }}, | 279   // Hyphenation Point instead of Katakana Middle dot | 
| 342   // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist | 280   {"xn--svgy16dha.jp", L"\x30a1\x2027\x30a3.jp", false}, | 
| 343   // all strings with the surrogate '\xdd12'. | 281   // Gershayim with other Hebrew characters is allowed. | 
| 344   {"xn--fk9c.com", L"\U00010912.com", | 282   {"xn--5db6bh9b.il", L"\x05e9\x05d1\x05f4\x05e6.il", true}, | 
| 345     {true,  false, false, false, false, | 283   // Hebrew Gershayim with Latin is disallowed. | 
| 346      false, false, false, false, false, | 284   {"xn--ab-yod.com", L"a\x05f4" L"b.com", false}, | 
| 347      false, false, false, false, false, | 285   // Hebrew Gershayim with Arabic is disallowed. | 
| 348      false, false, false, false, false, | 286   {"xn--5eb7h.eg", L"\x0628\x05f4.eg", false}, | 
| 349   }}, | 287 | 
| 350 #if 0 | 288   // Custom dangerous patterns | 
| 351   // These two cases are special. We need a separate test. | 289   // Two Katakana-Hiragana combining mark in a row | 
| 352   // U+3000 and U+3002 are normalized to ASCII space and dot. | 290   {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false}, | 
| 353   {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", | 291   // Katakana Letter No not enclosed by {Han,Hiragana,Katakana}. | 
| 354     {false, false, true,  false, false, | 292   {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", false}, | 
| 355      false, false, false, false, false, | 293   // TODO(jshin): Review the danger of allowing the following two. | 
| 356      false, false, false, false, false, | 294   // Hiragana 'No' by itself is allowed. | 
| 357      false, false, true,  false, false, | 295   {"xn--ldk.jp", L"\x30ce.jp", true}, | 
| 358      true}}, | 296   // Hebrew Gershayim used by itself is allowed. | 
| 359   {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", | 297   {"xn--5eb.il", L"\x05f4.il", true}, | 
| 360     {false, false, true,  false, false, | 298 | 
| 361      false, false, false, false, false, | 299   // 4 Deviation characters between IDNA 2003 and IDNA 2008 | 
| 362      false, false, false, false, false, | 300   // When entered in Unicode, the first two are mapped to 'ss' and Greek sigma | 
| 363      false, false, true,  false, false, | 301   // and the latter two are mapped away. However, the punycode form should | 
| 364      true}}, | 302   // remain in punycode. | 
| 365 #endif | 303   // U+00DF(sharp-s) | 
|  | 304   {"xn--fu-hia.de", L"fu\x00df.de", false}, | 
|  | 305   // U+03C2(final-sigma) | 
|  | 306   {"xn--mxac2c.gr", L"\x03b1\x03b2\x03c2.gr", false}, | 
|  | 307   // U+200C(ZWNJ) | 
|  | 308   {"xn--h2by8byc123p.in", L"\x0924\x094d\x200c\x0930\x093f.in", false}, | 
|  | 309   // U+200C(ZWJ) | 
|  | 310   {"xn--11b6iy14e.in", L"\x0915\x094d\x200d.in", false}, | 
|  | 311 | 
|  | 312   // Math Monospace Small A. When entered in Unicode, it's canonicalized to | 
|  | 313   // 'a'. The punycode form should remain in punycode. | 
|  | 314   {"xn--bc-9x80a.xyz", L"\U0001d68a" L"bc.xyz", false}, | 
|  | 315   // Math Sans Bold Capital Alpha | 
|  | 316   {"xn--bc-rg90a.xyz", L"\U0001d756" L"bc.xyz", false}, | 
|  | 317   // U+3000 is canonicalized to a space(U+0020), but the punycode form | 
|  | 318   // should remain in punycode. | 
|  | 319   {"xn--p6j412gn7f.cn", L"\x4e2d\x56fd\x3000", false}, | 
|  | 320   // U+3002 is canonicalized to ASCII fullstop(U+002E), but the punycode form | 
|  | 321   // should remain in punycode. | 
|  | 322   {"xn--r6j012gn7f.cn", L"\x4e2d\x56fd\x3002", false}, | 
|  | 323   // Invalid punycode | 
|  | 324   // Has a codepoint beyond U+10FFFF. | 
|  | 325   {"xn--krank-kg706554a", nullptr, false}, | 
|  | 326   // '?' in punycode. | 
|  | 327   {"xn--hello?world.com", nullptr, false}, | 
|  | 328 | 
|  | 329   // Not allowed in UTS46/IDNA 2008 | 
|  | 330   // Georgian Capital Letter(U+10BD) | 
|  | 331   {"xn--1nd.com", L"\x10bd.com", false}, | 
|  | 332   // 3rd and 4th characters are '-'. | 
|  | 333   {"xn-----8kci4dhsd", L"\x0440\x0443--\x0430\x0432\x0442\x043e", false}, | 
|  | 334   // Leading combining mark | 
|  | 335   {"xn--72b.com", L"\x093e.com", false}, | 
|  | 336   // BiDi check per IDNA 2008/UTS 46 | 
|  | 337   // Cannot starts with AN(Arabic-Indic Number) | 
|  | 338   {"xn--8hbae.eg", L"\x0662\x0660\x0660.eg", false}, | 
|  | 339   // Cannot start with a RTL character and ends with a LTR | 
|  | 340   {"xn--x-ymcov.eg", L"\x062c\x0627\x0631x.eg", false}, | 
|  | 341   // Can start with a RTL character and ends with EN(European Number) | 
|  | 342   {"xn--2-ymcov.eg", L"\x062c\x0627\x0631" L"2.eg", true}, | 
|  | 343   // Can start with a RTL and end with AN | 
|  | 344   {"xn--mgbjq0r.eg", L"\x062c\x0627\x0631\x0662.eg", true}, | 
| 366 }; | 345 }; | 
| 367 | 346 | 
| 368 struct AdjustOffsetCase { | 347 struct AdjustOffsetCase { | 
| 369   size_t input_offset; | 348   size_t input_offset; | 
| 370   size_t output_offset; | 349   size_t output_offset; | 
| 371 }; | 350 }; | 
| 372 | 351 | 
| 373 struct UrlTestData { | 352 struct UrlTestData { | 
| 374   const char* const description; | 353   const char* const description; | 
| 375   const char* const input; | 354   const char* const input; | 
| 376   const char* const languages; |  | 
| 377   FormatUrlTypes format_types; | 355   FormatUrlTypes format_types; | 
| 378   net::UnescapeRule::Type escape_rules; | 356   net::UnescapeRule::Type escape_rules; | 
| 379   const wchar_t* output;  // Use |wchar_t| to handle Unicode constants easily. | 357   const wchar_t* output;  // Use |wchar_t| to handle Unicode constants easily. | 
| 380   size_t prefix_len; | 358   size_t prefix_len; | 
| 381 }; | 359 }; | 
| 382 | 360 | 
| 383 // A helper for IDN*{Fast,Slow}. |  | 
| 384 // Append "::<language list>" to |expected| and |actual| to make it |  | 
| 385 // easy to tell which sub-case fails without debugging. |  | 
| 386 void AppendLanguagesToOutputs(const char* languages, |  | 
| 387                               base::string16* expected, |  | 
| 388                               base::string16* actual) { |  | 
| 389   base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); |  | 
| 390   expected->append(to_append); |  | 
| 391   actual->append(to_append); |  | 
| 392 } |  | 
| 393 |  | 
| 394 // A pair of helpers for the FormatUrlWithOffsets() test. | 361 // A pair of helpers for the FormatUrlWithOffsets() test. | 
| 395 void VerboseExpect(size_t expected, | 362 void VerboseExpect(size_t expected, | 
| 396                    size_t actual, | 363                    size_t actual, | 
| 397                    const std::string& original_url, | 364                    const std::string& original_url, | 
| 398                    size_t position, | 365                    size_t position, | 
| 399                    const base::string16& formatted_url) { | 366                    const base::string16& formatted_url) { | 
| 400   EXPECT_EQ(expected, actual) << "Original URL: " << original_url | 367   EXPECT_EQ(expected, actual) << "Original URL: " << original_url | 
| 401       << " (at char " << position << ")\nFormatted URL: " << formatted_url; | 368       << " (at char " << position << ")\nFormatted URL: " << formatted_url; | 
| 402 } | 369 } | 
| 403 | 370 | 
| 404 void CheckAdjustedOffsets(const std::string& url_string, | 371 void CheckAdjustedOffsets(const std::string& url_string, | 
| 405                           const std::string& languages, |  | 
| 406                           FormatUrlTypes format_types, | 372                           FormatUrlTypes format_types, | 
| 407                           net::UnescapeRule::Type unescape_rules, | 373                           net::UnescapeRule::Type unescape_rules, | 
| 408                           const size_t* output_offsets) { | 374                           const size_t* output_offsets) { | 
| 409   GURL url(url_string); | 375   GURL url(url_string); | 
| 410   size_t url_length = url_string.length(); | 376   size_t url_length = url_string.length(); | 
| 411   std::vector<size_t> offsets; | 377   std::vector<size_t> offsets; | 
| 412   for (size_t i = 0; i <= url_length + 1; ++i) | 378   for (size_t i = 0; i <= url_length + 1; ++i) | 
| 413     offsets.push_back(i); | 379     offsets.push_back(i); | 
| 414   offsets.push_back(500000);  // Something larger than any input length. | 380   offsets.push_back(500000);  // Something larger than any input length. | 
| 415   offsets.push_back(std::string::npos); | 381   offsets.push_back(std::string::npos); | 
| 416   base::string16 formatted_url = FormatUrlWithOffsets(url, languages, | 382   base::string16 formatted_url = FormatUrlWithOffsets(url, std::string(), | 
| 417       format_types, unescape_rules, NULL, NULL, &offsets); | 383       format_types, unescape_rules, NULL, NULL, &offsets); | 
| 418   for (size_t i = 0; i < url_length; ++i) | 384   for (size_t i = 0; i < url_length; ++i) | 
| 419     VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); | 385     VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); | 
| 420   VerboseExpect(formatted_url.length(), offsets[url_length], url_string, | 386   VerboseExpect(formatted_url.length(), offsets[url_length], url_string, | 
| 421                 url_length, formatted_url); | 387                 url_length, formatted_url); | 
| 422   VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, | 388   VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, | 
| 423                 500000, formatted_url); | 389                 500000, formatted_url); | 
| 424   VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, | 390   VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, | 
| 425                 std::string::npos, formatted_url); | 391                 std::string::npos, formatted_url); | 
| 426 } | 392 } | 
| 427 | 393 | 
| 428 TEST(UrlFormatterTest, IDNToUnicodeFast) { | 394 TEST(UrlFormatterTest, IDNToUnicode) { | 
| 429   for (size_t i = 0; i < arraysize(idn_cases); i++) { | 395   for (size_t i = 0; i < arraysize(idn_cases); i++) { | 
| 430     for (size_t j = 0; j < arraysize(kLanguages); j++) { | 396     base::string16 output(IDNToUnicode(idn_cases[i].input, std::string())); | 
| 431       // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow | 397     base::string16 expected(idn_cases[i].unicode_allowed | 
| 432       if (j == 3 || j == 17 || j == 18) | 398                                 ? WideToUTF16(idn_cases[i].unicode_output) | 
| 433         continue; | 399                                 : ASCIIToUTF16(idn_cases[i].input)); | 
| 434       base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | 400     EXPECT_EQ(expected, output) << "input # " << i << ": \"" | 
| 435       base::string16 expected(idn_cases[i].unicode_allowed[j] ? | 401                                 << idn_cases[i].input << "\""; | 
| 436           WideToUTF16(idn_cases[i].unicode_output) : |  | 
| 437           ASCIIToUTF16(idn_cases[i].input)); |  | 
| 438       AppendLanguagesToOutputs(kLanguages[j], &expected, &output); |  | 
| 439       EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input |  | 
| 440                                   << "\", languages: \"" << kLanguages[j] |  | 
| 441                                   << "\""; |  | 
| 442     } |  | 
| 443   } |  | 
| 444 } |  | 
| 445 |  | 
| 446 TEST(UrlFormatterTest, IDNToUnicodeSlow) { |  | 
| 447   for (size_t i = 0; i < arraysize(idn_cases); i++) { |  | 
| 448     for (size_t j = 0; j < arraysize(kLanguages); j++) { |  | 
| 449       // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast |  | 
| 450       if (!(j == 3 || j == 17 || j == 18)) |  | 
| 451         continue; |  | 
| 452       base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); |  | 
| 453       base::string16 expected(idn_cases[i].unicode_allowed[j] ? |  | 
| 454           WideToUTF16(idn_cases[i].unicode_output) : |  | 
| 455           ASCIIToUTF16(idn_cases[i].input)); |  | 
| 456       AppendLanguagesToOutputs(kLanguages[j], &expected, &output); |  | 
| 457       EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input |  | 
| 458                                   << "\", languages: \"" << kLanguages[j] |  | 
| 459                                   << "\""; |  | 
| 460     } |  | 
| 461   } |  | 
| 462 } |  | 
| 463 |  | 
| 464 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and |  | 
| 465 // te), which was causing a crash (See http://crbug.com/510551).  This may be an |  | 
| 466 // icu bug, but regardless, that should not cause a crash. |  | 
| 467 TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) { |  | 
| 468   for (char c1 = 'a'; c1 <= 'z'; c1++) { |  | 
| 469     for (char c2 = 'a'; c2 <= 'z'; c2++) { |  | 
| 470       std::string lang = base::StringPrintf("%c%c", c1, c2); |  | 
| 471       base::string16 output(IDNToUnicode("xn--74h", lang)); |  | 
| 472     } |  | 
| 473   } | 402   } | 
| 474 } | 403 } | 
| 475 | 404 | 
| 476 TEST(UrlFormatterTest, FormatUrl) { | 405 TEST(UrlFormatterTest, FormatUrl) { | 
| 477   FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; | 406   FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; | 
| 478   const UrlTestData tests[] = { | 407   const UrlTestData tests[] = { | 
| 479       {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"", | 408       {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"", | 
| 480        0}, | 409        0}, | 
| 481 | 410 | 
| 482       {"Simple URL", "http://www.google.com/", "", default_format_type, | 411       {"Simple URL", "http://www.google.com/",  default_format_type, | 
| 483        net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, | 412        net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, | 
| 484 | 413 | 
| 485       {"With a port number and a reference", | 414       {"With a port number and a reference", | 
| 486        "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, | 415        "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type, | 
| 487        net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, | 416        net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, | 
| 488 | 417 | 
| 489       // -------- IDN tests -------- | 418       // -------- IDN tests -------- | 
| 490       {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja", | 419       {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", | 
| 491        default_format_type, net::UnescapeRule::NORMAL, | 420        default_format_type, net::UnescapeRule::NORMAL, | 
| 492        L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | 421        L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | 
| 493 | 422 | 
| 494       {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en", | 423       {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", | 
| 495        default_format_type, net::UnescapeRule::NORMAL, |  | 
| 496        L"http://xn--l8jvb1ey91xtjb.jp/", 7}, |  | 
| 497 |  | 
| 498       {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "", |  | 
| 499        default_format_type, net::UnescapeRule::NORMAL, |  | 
| 500        // Single script is safe for empty languages. |  | 
| 501        L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, |  | 
| 502 |  | 
| 503       {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", |  | 
| 504        default_format_type, net::UnescapeRule::NORMAL, | 424        default_format_type, net::UnescapeRule::NORMAL, | 
| 505        // GURL doesn't assume an email address's domain part as a host name. | 425        // GURL doesn't assume an email address's domain part as a host name. | 
| 506        L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, | 426        L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, | 
| 507 | 427 | 
| 508       {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", | 428       {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", | 
| 509        "ja", default_format_type, net::UnescapeRule::NORMAL, | 429        default_format_type, net::UnescapeRule::NORMAL, | 
| 510        L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, | 430        L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, | 
| 511 | 431 | 
| 512       {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", | 432       {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", | 
| 513        default_format_type, net::UnescapeRule::NORMAL, | 433        default_format_type, net::UnescapeRule::NORMAL, | 
| 514        L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, | 434        L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, | 
| 515 | 435 | 
| 516       // -------- omit_username_password flag tests -------- | 436       // -------- omit_username_password flag tests -------- | 
| 517       {"With username and password, omit_username_password=false", | 437       {"With username and password, omit_username_password=false", | 
| 518        "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing, | 438        "http://user:passwd@example.com/foo", kFormatUrlOmitNothing, | 
| 519        net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, | 439        net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, | 
| 520 | 440 | 
| 521       {"With username and password, omit_username_password=true", | 441       {"With username and password, omit_username_password=true", | 
| 522        "http://user:passwd@example.com/foo", "", default_format_type, | 442        "http://user:passwd@example.com/foo", default_format_type, | 
| 523        net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, | 443        net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, | 
| 524 | 444 | 
| 525       {"With username and no password", "http://user@example.com/foo", "", | 445       {"With username and no password", "http://user@example.com/foo", | 
| 526        default_format_type, net::UnescapeRule::NORMAL, | 446        default_format_type, net::UnescapeRule::NORMAL, | 
| 527        L"http://example.com/foo", 7}, | 447        L"http://example.com/foo", 7}, | 
| 528 | 448 | 
| 529       {"Just '@' without username and password", "http://@example.com/foo", "", | 449       {"Just '@' without username and password", "http://@example.com/foo", | 
| 530        default_format_type, net::UnescapeRule::NORMAL, | 450        default_format_type, net::UnescapeRule::NORMAL, | 
| 531        L"http://example.com/foo", 7}, | 451        L"http://example.com/foo", 7}, | 
| 532 | 452 | 
| 533       // GURL doesn't think local-part of an email address is username for URL. | 453       // GURL doesn't think local-part of an email address is username for URL. | 
| 534       {"mailto:, omit_username_password=true", "mailto:foo@example.com", "", | 454       {"mailto:, omit_username_password=true", "mailto:foo@example.com", | 
| 535        default_format_type, net::UnescapeRule::NORMAL, | 455        default_format_type, net::UnescapeRule::NORMAL, | 
| 536        L"mailto:foo@example.com", 7}, | 456        L"mailto:foo@example.com", 7}, | 
| 537 | 457 | 
| 538       // -------- unescape flag tests -------- | 458       // -------- unescape flag tests -------- | 
| 539       {"Do not unescape", | 459       {"Do not unescape", | 
| 540        "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | 460        "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | 
| 541        "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 461        "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 
| 542        "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 462        "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 
| 543        "en", default_format_type, net::UnescapeRule::NONE, | 463        default_format_type, net::UnescapeRule::NONE, | 
| 544        // GURL parses %-encoded hostnames into Punycode. | 464        // GURL parses %-encoded hostnames into Punycode. | 
| 545        L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 465        L"http://\x30B0\x30FC\x30B0\x30EB.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB
     " | 
| 546        L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 466        L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 
| 547        7}, | 467        7}, | 
| 548 | 468 | 
| 549       {"Unescape normally", | 469       {"Unescape normally", | 
| 550        "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | 470        "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | 
| 551        "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 471        "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 
| 552        "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 472        "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 
| 553        "en", default_format_type, net::UnescapeRule::NORMAL, | 473        default_format_type, net::UnescapeRule::NORMAL, | 
| 554        L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" | 474        L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB" | 
| 555        L"?q=\x30B0\x30FC\x30B0\x30EB", | 475        L"?q=\x30B0\x30FC\x30B0\x30EB", | 
| 556        7}, | 476        7}, | 
| 557 | 477 | 
| 558       {"Unescape normally with BiDi control character", | 478       {"Unescape normally with BiDi control character", | 
| 559        "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", | 479        "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", | 
| 560        default_format_type, net::UnescapeRule::NORMAL, | 480        default_format_type, net::UnescapeRule::NORMAL, | 
| 561        L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, | 481        L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, | 
| 562 | 482 | 
| 563       {"Unescape normally including unescape spaces", | 483       {"Unescape normally including unescape spaces", | 
| 564        "http://www.google.com/search?q=Hello%20World", "en", | 484        "http://www.google.com/search?q=Hello%20World", | 
| 565        default_format_type, net::UnescapeRule::SPACES, | 485        default_format_type, net::UnescapeRule::SPACES, | 
| 566        L"http://www.google.com/search?q=Hello World", 7}, | 486        L"http://www.google.com/search?q=Hello World", 7}, | 
| 567 | 487 | 
| 568       /* | 488       /* | 
| 569       {"unescape=true with some special characters", | 489       {"unescape=true with some special characters", | 
| 570       "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", | 490       "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", | 
| 571       kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 491       kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 
| 572       L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, | 492       L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, | 
| 573       */ | 493       */ | 
| 574       // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". | 494       // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". | 
| 575 | 495 | 
| 576       // -------- omit http: -------- | 496       // -------- omit http: -------- | 
| 577       {"omit http with user name", "http://user@example.com/foo", "", | 497       {"omit http with user name", "http://user@example.com/foo", | 
| 578        kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, | 498        kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, | 
| 579 | 499 | 
| 580       {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP, | 500       {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP, | 
| 581        net::UnescapeRule::NORMAL, L"www.google.com/", 0}, | 501        net::UnescapeRule::NORMAL, L"www.google.com/", 0}, | 
| 582 | 502 | 
| 583       {"omit http with https", "https://www.google.com/", "en", | 503       {"omit http with https", "https://www.google.com/", | 
| 584        kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, | 504        kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, | 
| 585        L"https://www.google.com/", 8}, | 505        L"https://www.google.com/", 8}, | 
| 586 | 506 | 
| 587       {"omit http starts with ftp.", "http://ftp.google.com/", "en", | 507       {"omit http starts with ftp.", "http://ftp.google.com/", | 
| 588        kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", | 508        kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", | 
| 589        7}, | 509        7}, | 
| 590 | 510 | 
| 591       // -------- omit trailing slash on bare hostname -------- | 511       // -------- omit trailing slash on bare hostname -------- | 
| 592       {"omit slash when it's the entire path", "http://www.google.com/", "en", | 512       {"omit slash when it's the entire path", "http://www.google.com/", | 
| 593        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 513        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 
| 594        L"http://www.google.com", 7}, | 514        L"http://www.google.com", 7}, | 
| 595       {"omit slash when there's a ref", "http://www.google.com/#ref", "en", | 515       {"omit slash when there's a ref", "http://www.google.com/#ref", | 
| 596        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 516        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 
| 597        L"http://www.google.com/#ref", 7}, | 517        L"http://www.google.com/#ref", 7}, | 
| 598       {"omit slash when there's a query", "http://www.google.com/?", "en", | 518       {"omit slash when there's a query", "http://www.google.com/?", | 
| 599        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 519        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 
| 600        L"http://www.google.com/?", 7}, | 520        L"http://www.google.com/?", 7}, | 
| 601       {"omit slash when it's not the entire path", "http://www.google.com/foo", | 521       {"omit slash when it's not the entire path", "http://www.google.com/foo", | 
| 602        "en", kFormatUrlOmitTrailingSlashOnBareHostname, | 522        kFormatUrlOmitTrailingSlashOnBareHostname, | 
| 603        net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, | 523        net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, | 
| 604       {"omit slash for nonstandard URLs", "data:/", "en", | 524       {"omit slash for nonstandard URLs", "data:/", | 
| 605        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 525        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 
| 606        L"data:/", 5}, | 526        L"data:/", 5}, | 
| 607       {"omit slash for file URLs", "file:///", "en", | 527       {"omit slash for file URLs", "file:///", | 
| 608        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 528        kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 
| 609        L"file:///", 7}, | 529        L"file:///", 7}, | 
| 610 | 530 | 
| 611       // -------- view-source: -------- | 531       // -------- view-source: -------- | 
| 612       {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja", | 532       {"view-source", "view-source:http://xn--qcka1pmc.jp/", | 
| 613        default_format_type, net::UnescapeRule::NORMAL, | 533        default_format_type, net::UnescapeRule::NORMAL, | 
| 614        L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, | 534        L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, | 
| 615 | 535 | 
| 616       {"view-source of view-source", | 536       {"view-source of view-source", | 
| 617        "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", | 537        "view-source:view-source:http://xn--qcka1pmc.jp/", | 
| 618        default_format_type, net::UnescapeRule::NORMAL, | 538        default_format_type, net::UnescapeRule::NORMAL, | 
| 619        L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, | 539        L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, | 
| 620 | 540 | 
| 621       // view-source should omit http and trailing slash where non-view-source | 541       // view-source should omit http and trailing slash where non-view-source | 
| 622       // would. | 542       // would. | 
| 623       {"view-source omit http", "view-source:http://a.b/c", "en", | 543       {"view-source omit http", "view-source:http://a.b/c", | 
| 624        kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, | 544        kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, | 
| 625       {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", | 545       {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", | 
| 626        "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL, | 546        kFormatUrlOmitAll, net::UnescapeRule::NORMAL, | 
| 627        L"view-source:http://ftp.b/c", 19}, | 547        L"view-source:http://ftp.b/c", 19}, | 
| 628       {"view-source omit slash when it's the entire path", | 548       {"view-source omit slash when it's the entire path", | 
| 629        "view-source:http://a.b/", "en", kFormatUrlOmitAll, | 549        "view-source:http://a.b/", kFormatUrlOmitAll, | 
| 630        net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, | 550        net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, | 
| 631   }; | 551   }; | 
| 632 | 552 | 
| 633   for (size_t i = 0; i < arraysize(tests); ++i) { | 553   for (size_t i = 0; i < arraysize(tests); ++i) { | 
| 634     size_t prefix_len; | 554     size_t prefix_len; | 
| 635     base::string16 formatted = FormatUrl( | 555     base::string16 formatted = FormatUrl( | 
| 636         GURL(tests[i].input), tests[i].languages, tests[i].format_types, | 556         GURL(tests[i].input), std::string(), tests[i].format_types, | 
| 637         tests[i].escape_rules, NULL, &prefix_len, NULL); | 557         tests[i].escape_rules, NULL,  &prefix_len, NULL); | 
| 638     EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; | 558     EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; | 
| 639     EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; | 559     EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; | 
| 640   } | 560   } | 
| 641 } | 561 } | 
| 642 | 562 | 
| 643 TEST(UrlFormatterTest, FormatUrlParsed) { | 563 TEST(UrlFormatterTest, FormatUrlParsed) { | 
| 644   // No unescape case. | 564   // No unescape case. | 
| 645   url::Parsed parsed; | 565   url::Parsed parsed; | 
| 646   base::string16 formatted = | 566   base::string16 formatted = | 
| 647       FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | 567       FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | 
| 648                      "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | 568                      "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | 
| 649                 "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed, | 569                 std::string(), kFormatUrlOmitNothing, net::UnescapeRule::NONE, | 
| 650                 NULL, NULL); | 570                 &parsed, NULL, NULL); | 
| 651   EXPECT_EQ(WideToUTF16( | 571   EXPECT_EQ(WideToUTF16( | 
| 652       L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" | 572       L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" | 
| 653       L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); | 573       L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); | 
| 654   EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), | 574   EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), | 
| 655       formatted.substr(parsed.username.begin, parsed.username.len)); | 575       formatted.substr(parsed.username.begin, parsed.username.len)); | 
| 656   EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), | 576   EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), | 
| 657       formatted.substr(parsed.password.begin, parsed.password.len)); | 577       formatted.substr(parsed.password.begin, parsed.password.len)); | 
| 658   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | 578   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | 
| 659       formatted.substr(parsed.host.begin, parsed.host.len)); | 579       formatted.substr(parsed.host.begin, parsed.host.len)); | 
| 660   EXPECT_EQ(WideToUTF16(L"8080"), | 580   EXPECT_EQ(WideToUTF16(L"8080"), | 
| (...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 855     if (test_char && | 775     if (test_char && | 
| 856         strchr(kUnescapedCharacters, static_cast<char>(test_char))) { | 776         strchr(kUnescapedCharacters, static_cast<char>(test_char))) { | 
| 857       EXPECT_NE(url.spec(), GURL(formatted).spec()); | 777       EXPECT_NE(url.spec(), GURL(formatted).spec()); | 
| 858     } else { | 778     } else { | 
| 859       EXPECT_EQ(url.spec(), GURL(formatted).spec()); | 779       EXPECT_EQ(url.spec(), GURL(formatted).spec()); | 
| 860     } | 780     } | 
| 861   } | 781   } | 
| 862 } | 782 } | 
| 863 | 783 | 
| 864 TEST(UrlFormatterTest, FormatUrlWithOffsets) { | 784 TEST(UrlFormatterTest, FormatUrlWithOffsets) { | 
| 865   CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, | 785   CheckAdjustedOffsets(std::string(),  kFormatUrlOmitNothing, | 
| 866                        net::UnescapeRule::NORMAL, NULL); | 786                        net::UnescapeRule::NORMAL, NULL); | 
| 867 | 787 | 
| 868   const size_t basic_offsets[] = { | 788   const size_t basic_offsets[] = { | 
| 869     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 789     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 
| 870     21, 22, 23, 24, 25 | 790     21, 22, 23, 24, 25 | 
| 871   }; | 791   }; | 
| 872   CheckAdjustedOffsets("http://www.google.com/foo/", "en", | 792   CheckAdjustedOffsets("http://www.google.com/foo/", | 
| 873                        kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 793                        kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 
| 874                        basic_offsets); | 794                        basic_offsets); | 
| 875 | 795 | 
| 876   const size_t omit_auth_offsets_1[] = { | 796   const size_t omit_auth_offsets_1[] = { | 
| 877     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, | 797     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, | 
| 878     8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 798     8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 
| 879   }; | 799   }; | 
| 880   CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", | 800   CheckAdjustedOffsets("http://foo:bar@www.google.com/", | 
| 881                        kFormatUrlOmitUsernamePassword, | 801                        kFormatUrlOmitUsernamePassword, | 
| 882                        net::UnescapeRule::NORMAL, omit_auth_offsets_1); | 802                        net::UnescapeRule::NORMAL, omit_auth_offsets_1); | 
| 883 | 803 | 
| 884   const size_t omit_auth_offsets_2[] = { | 804   const size_t omit_auth_offsets_2[] = { | 
| 885     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, | 805     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, | 
| 886     15, 16, 17, 18, 19, 20, 21 | 806     15, 16, 17, 18, 19, 20, 21 | 
| 887   }; | 807   }; | 
| 888   CheckAdjustedOffsets("http://foo@www.google.com/", "en", | 808   CheckAdjustedOffsets("http://foo@www.google.com/", | 
| 889                        kFormatUrlOmitUsernamePassword, | 809                        kFormatUrlOmitUsernamePassword, | 
| 890                        net::UnescapeRule::NORMAL, omit_auth_offsets_2); | 810                        net::UnescapeRule::NORMAL, omit_auth_offsets_2); | 
| 891 | 811 | 
| 892   const size_t dont_omit_auth_offsets[] = { | 812   const size_t dont_omit_auth_offsets[] = { | 
| 893     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 813     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 
| 894     kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 814     kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 
| 895     kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | 815     kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | 
| 896     30, 31 | 816     30, 31 | 
| 897   }; | 817   }; | 
| 898   // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". | 818   // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". | 
| 899   CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", | 819   CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", | 
| 900                        kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 820                        kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 
| 901                        dont_omit_auth_offsets); | 821                        dont_omit_auth_offsets); | 
| 902 | 822 | 
| 903   const size_t view_source_offsets[] = { | 823   const size_t view_source_offsets[] = { | 
| 904     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, | 824     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, | 
| 905     kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 | 825     kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 | 
| 906   }; | 826   }; | 
| 907   CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", | 827   CheckAdjustedOffsets("view-source:http://foo@www.google.com/", | 
| 908                        kFormatUrlOmitUsernamePassword, | 828                        kFormatUrlOmitUsernamePassword, | 
| 909                        net::UnescapeRule::NORMAL, view_source_offsets); | 829                        net::UnescapeRule::NORMAL, view_source_offsets); | 
| 910 | 830 | 
| 911   const size_t idn_hostname_offsets_1[] = { | 831   const size_t idn_hostname_offsets_1[] = { | 
| 912     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 832     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 
| 913     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, | 833     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, | 
| 914     13, 14, 15, 16, 17, 18, 19 | 834     13, 14, 15, 16, 17, 18, 19 | 
| 915   }; | 835   }; | 
| 916   // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". | 836   // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". | 
| 917   CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", | 837   CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", | 
| 918                        kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 838                        kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 
| 919                        idn_hostname_offsets_1); | 839                        idn_hostname_offsets_1); | 
| 920 | 840 | 
| 921   const size_t idn_hostname_offsets_2[] = { | 841   const size_t idn_hostname_offsets_2[] = { | 
| 922     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, | 842     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, | 
| 923     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, | 843     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, | 
| 924     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 844     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 
| 925     kNpos, 19, 20, 21, 22, 23, 24 | 845     kNpos, 19, 20, 21, 22, 23, 24 | 
| 926   }; | 846   }; | 
| 927   // Convert punycode to | 847   // Convert punycode to | 
| 928   // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". | 848   // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". | 
| 929   CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", | 849   CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", | 
| 930                        "zh-CN", kFormatUrlOmitNothing, | 850                        kFormatUrlOmitNothing, | 
| 931                        net::UnescapeRule::NORMAL, idn_hostname_offsets_2); | 851                        net::UnescapeRule::NORMAL, idn_hostname_offsets_2); | 
| 932 | 852 | 
| 933   const size_t unescape_offsets[] = { | 853   const size_t unescape_offsets[] = { | 
| 934     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 854     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 
| 935     21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, | 855     21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, | 
| 936     kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, | 856     kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, | 
| 937     kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 857     kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 
| 938     kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos | 858     kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos | 
| 939   }; | 859   }; | 
| 940   // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". | 860   // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". | 
| 941   CheckAdjustedOffsets( | 861   CheckAdjustedOffsets( | 
| 942       "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 862       "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 
| 943       "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); | 863       kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); | 
| 944 | 864 | 
| 945   const size_t ref_offsets[] = { | 865   const size_t ref_offsets[] = { | 
| 946     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 866     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 
| 947     21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, | 867     21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, | 
| 948     33 | 868     33 | 
| 949   }; | 869   }; | 
| 950   // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". | 870   // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". | 
| 951   CheckAdjustedOffsets( | 871   CheckAdjustedOffsets( | 
| 952       "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", | 872       "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", | 
| 953       kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); | 873       kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); | 
| 954 | 874 | 
| 955   const size_t omit_http_offsets[] = { | 875   const size_t omit_http_offsets[] = { | 
| 956     0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | 876     0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | 
| 957     10, 11, 12, 13, 14 | 877     10, 11, 12, 13, 14 | 
| 958   }; | 878   }; | 
| 959   CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, | 879   CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP, | 
| 960                        net::UnescapeRule::NORMAL, omit_http_offsets); | 880                        net::UnescapeRule::NORMAL, omit_http_offsets); | 
| 961 | 881 | 
| 962   const size_t omit_http_start_with_ftp_offsets[] = { | 882   const size_t omit_http_start_with_ftp_offsets[] = { | 
| 963     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 883     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 
| 964   }; | 884   }; | 
| 965   CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, | 885   CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP, | 
| 966                        net::UnescapeRule::NORMAL, | 886                        net::UnescapeRule::NORMAL, | 
| 967                        omit_http_start_with_ftp_offsets); | 887                        omit_http_start_with_ftp_offsets); | 
| 968 | 888 | 
| 969   const size_t omit_all_offsets[] = { | 889   const size_t omit_all_offsets[] = { | 
| 970     0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 890     0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 
| 971     0, 1, 2, 3, 4, 5, 6, 7 | 891     0, 1, 2, 3, 4, 5, 6, 7 | 
| 972   }; | 892   }; | 
| 973   CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, | 893   CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, | 
| 974                        net::UnescapeRule::NORMAL, omit_all_offsets); | 894                        net::UnescapeRule::NORMAL, omit_all_offsets); | 
| 975 } | 895 } | 
| 976 | 896 | 
| 977 }  // namespace | 897 }  // namespace | 
| 978 | 898 | 
| 979 }  // namespace url_formatter | 899 }  // namespace url_formatter | 
| OLD | NEW | 
|---|