| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <errno.h> | 5 #include <errno.h> |
| 6 #include <stddef.h> | 6 #include <stddef.h> |
| 7 | 7 |
| 8 #include "base/macros.h" | 8 #include "base/macros.h" |
| 9 #include "base/strings/utf_string_conversions.h" | 9 #include "base/strings/utf_string_conversions.h" |
| 10 #include "testing/gtest/include/gtest/gtest.h" | 10 #include "testing/gtest/include/gtest/gtest.h" |
| (...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 287 | 287 |
| 288 EXPECT_EQ(std::string(":"), out_str); | 288 EXPECT_EQ(std::string(":"), out_str); |
| 289 EXPECT_EQ(0, out_comp.begin); | 289 EXPECT_EQ(0, out_comp.begin); |
| 290 EXPECT_EQ(0, out_comp.len); | 290 EXPECT_EQ(0, out_comp.len); |
| 291 } | 291 } |
| 292 | 292 |
| 293 TEST(URLCanonTest, Host) { | 293 TEST(URLCanonTest, Host) { |
| 294 IPAddressCase host_cases[] = { | 294 IPAddressCase host_cases[] = { |
| 295 // Basic canonicalization, uppercase should be converted to lowercase. | 295 // Basic canonicalization, uppercase should be converted to lowercase. |
| 296 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10), CanonHostInfo:
:NEUTRAL, -1, ""}, | 296 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10), CanonHostInfo:
:NEUTRAL, -1, ""}, |
| 297 // Spaces and some other characters should be escaped. | 297 // Spaces and some other characters should fail (used to be allowed). |
| 298 {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", Comp
onent(0, 22), CanonHostInfo::NEUTRAL, -1, ""}, | 298 {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", Comp
onent(0, 22), CanonHostInfo::BROKEN, -1, ""}, |
| 299 // Exciting different types of spaces! | 299 // Exciting different types of spaces! |
| 300 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16), Cano
nHostInfo::NEUTRAL, -1, ""}, | 300 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16), Cano
nHostInfo::BROKEN, -1, ""}, |
| 301 // Other types of space (no-break, zero-width, zero-width-no-break) are | 301 // Other types of space (no-break, zero-width, zero-width-no-break) are |
| 302 // name-prepped away to nothing. | 302 // name-prepped away to nothing. |
| 303 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10), Cano
nHostInfo::NEUTRAL, -1, ""}, | 303 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10), Cano
nHostInfo::NEUTRAL, -1, ""}, |
| 304 // Ideographic full stop (full-width period for Chinese, etc.) should be | 304 // Ideographic full stop (full-width period for Chinese, etc.) should be |
| 305 // treated as a dot. | 305 // treated as a dot. |
| 306 {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", Component(0, 15), Can
onHostInfo::NEUTRAL, -1, ""}, | 306 {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", Component(0, 15), Can
onHostInfo::NEUTRAL, -1, ""}, |
| 307 // Invalid unicode characters should fail... | 307 // Invalid unicode characters should fail... |
| 308 // ...In wide input, ICU will barf and we'll end up with the input as | 308 // ...In wide input, ICU will barf and we'll end up with the input as |
| 309 // escaped UTF-8 (the invalid character should be replaced with the | 309 // escaped UTF-8 (the invalid character should be replaced with the |
| 310 // replacement character). | 310 // replacement character). |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 354 // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional | 354 // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional |
| 355 // handling as well as in IDNA 2003. | 355 // handling as well as in IDNA 2003. |
| 356 {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc", | 356 {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc", |
| 357 Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""}, | 357 Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""}, |
| 358 // ZWJ between Devanagari characters is still mapped away in UTS 46 | 358 // ZWJ between Devanagari characters is still mapped away in UTS 46 |
| 359 // transitional handling. IDNA 2008 would give xn--11bo0mv54g. | 359 // transitional handling. IDNA 2008 would give xn--11bo0mv54g. |
| 360 {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c", | 360 {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c", |
| 361 L"\x915\x94d\x200d\x91c", "xn--11bo0m", | 361 L"\x915\x94d\x200d\x91c", "xn--11bo0m", |
| 362 Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, | 362 Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, |
| 363 // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b) | 363 // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b) |
| 364 // However, we do allow this at the moment because we don't use | 364 // Full-width ASCII is converted to ASCII. |
| 365 // STD3 rules and canonicalize full-width ASCII to ASCII. | |
| 366 {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", | 365 {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", |
| 367 Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""}, | 366 Component(0, 6), CanonHostInfo::BROKEN, -1, ""}, |
| 368 // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c) | 367 // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c) |
| 369 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 | 368 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 |
| 370 {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo", | 369 {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo", |
| 371 Component(0, 11), CanonHostInfo::BROKEN, -1, ""}, | 370 Component(0, 11), CanonHostInfo::BROKEN, -1, ""}, |
| 372 // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d) | 371 // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d) |
| 373 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 | 372 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 |
| 374 {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn", | 373 {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn", |
| 375 "%F0%AF%A1%A8%E5%A7%BB.cn", | 374 "%F0%AF%A1%A8%E5%A7%BB.cn", |
| 376 Component(0, 24), CanonHostInfo::BROKEN, -1, ""}, | 375 Component(0, 24), CanonHostInfo::BROKEN, -1, ""}, |
| 377 // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e) | 376 // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e) |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 471 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", | 470 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", |
| 472 "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, | 471 "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, |
| 473 "C0A80001"}, | 472 "C0A80001"}, |
| 474 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", | 473 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", |
| 475 "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, | 474 "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, |
| 476 "C0A80001"}, | 475 "C0A80001"}, |
| 477 // Invalid escaping should trigger the regular host error handling. | 476 // Invalid escaping should trigger the regular host error handling. |
| 478 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01",
"%253gxc0.0250..01", Component(0, 17), CanonHostInfo::BROKEN, -1, ""}, | 477 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01",
"%253gxc0.0250..01", Component(0, 17), CanonHostInfo::BROKEN, -1, ""}, |
| 479 // Something that isn't exactly an IP should get treated as a host and | 478 // Something that isn't exactly an IP should get treated as a host and |
| 480 // spaces escaped. | 479 // spaces escaped. |
| 481 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", Component
(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, | 480 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", Component
(0, 19), CanonHostInfo::BROKEN, -1, ""}, |
| 482 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. | 481 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. |
| 483 // These are "0Xc0.0250.01" in fullwidth. | 482 // These are "0Xc0.0250.01" in fullwidth. |
| 484 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x
92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x
ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", Comp
onent(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, | 483 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x
92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x
ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", Comp
onent(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, |
| 485 // Broken IP addresses get marked as such. | 484 // Broken IP addresses get marked as such. |
| 486 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13), Canon
HostInfo::BROKEN, -1, ""}, | 485 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13), Canon
HostInfo::BROKEN, -1, ""}, |
| 487 {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12), CanonHos
tInfo::BROKEN, -1, ""}, | 486 {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12), CanonHos
tInfo::BROKEN, -1, ""}, |
| 488 // Cyrillic letter followed by '(' should return punycode for '(' escaped | 487 // Cyrillic letter followed by '(' should return punycode for '(' escaped |
| 489 // before punycode string was created. I.e. | 488 // before punycode string was created. I.e. |
| 490 // if '(' is escaped after punycode is created we would get xn--%28-8tb | 489 // if '(' is escaped after punycode is created we would get xn--%28-8tb |
| 491 // (incorrect). | 490 // (incorrect). |
| 492 {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11), | 491 {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11), |
| 493 CanonHostInfo::NEUTRAL, -1, ""}, | 492 CanonHostInfo::BROKEN, -1, ""}, |
| 494 // Address with all hexidecimal characters with leading number of 1<<32 | 493 // Address with all hexidecimal characters with leading number of 1<<32 |
| 495 // or greater and should return NEUTRAL rather than BROKEN if not all | 494 // or greater and should return NEUTRAL rather than BROKEN if not all |
| 496 // components are numbers. | 495 // components are numbers. |
| 497 {"12345678912345.de", L"12345678912345.de", "12345678912345.de", Component(0
, 17), CanonHostInfo::NEUTRAL, -1, ""}, | 496 {"12345678912345.de", L"12345678912345.de", "12345678912345.de", Component(0
, 17), CanonHostInfo::NEUTRAL, -1, ""}, |
| 498 {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", Compo
nent(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, | 497 {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", Compo
nent(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, |
| 499 {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "1
2345678912345.12345678912345.de", Component(0, 32), CanonHostInfo::NEUTRAL, -1,
""}, | 498 {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "1
2345678912345.12345678912345.de", Component(0, 32), CanonHostInfo::NEUTRAL, -1,
""}, |
| 500 {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", Co
mponent(0, 20), CanonHostInfo::NEUTRAL, -1, ""}, | 499 {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", Co
mponent(0, 20), CanonHostInfo::NEUTRAL, -1, ""}, |
| 501 {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", Compo
nent(0, 19), CanonHostInfo::BROKEN, -1, ""}, | 500 {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", Compo
nent(0, 19), CanonHostInfo::BROKEN, -1, ""}, |
| 502 }; | 501 }; |
| 503 | 502 |
| (...skipping 1746 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2250 }; | 2249 }; |
| 2251 | 2250 |
| 2252 for (auto& test_case : cases) { | 2251 for (auto& test_case : cases) { |
| 2253 SCOPED_TRACE(test_case.scheme); | 2252 SCOPED_TRACE(test_case.scheme); |
| 2254 EXPECT_EQ(test_case.expected_port, | 2253 EXPECT_EQ(test_case.expected_port, |
| 2255 DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme))); | 2254 DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme))); |
| 2256 } | 2255 } |
| 2257 } | 2256 } |
| 2258 | 2257 |
| 2259 } // namespace url | 2258 } // namespace url |
| OLD | NEW |