OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 30 matching lines...) Expand all Loading... |
41 // Some implementations of base/basictypes.h may define ARRAYSIZE. | 41 // Some implementations of base/basictypes.h may define ARRAYSIZE. |
42 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro | 42 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro |
43 // which is in our version of basictypes.h. | 43 // which is in our version of basictypes.h. |
44 #ifndef ARRAYSIZE | 44 #ifndef ARRAYSIZE |
45 #define ARRAYSIZE ARRAYSIZE_UNSAFE | 45 #define ARRAYSIZE ARRAYSIZE_UNSAFE |
46 #endif | 46 #endif |
47 | 47 |
48 using url_test_utils::WStringToUTF16; | 48 using url_test_utils::WStringToUTF16; |
49 using url_test_utils::ConvertUTF8ToUTF16; | 49 using url_test_utils::ConvertUTF8ToUTF16; |
50 using url_test_utils::ConvertUTF16ToUTF8; | 50 using url_test_utils::ConvertUTF16ToUTF8; |
| 51 using url_canon::CanonHostInfo; |
51 | 52 |
52 namespace { | 53 namespace { |
53 | 54 |
54 struct ComponentCase { | 55 struct ComponentCase { |
55 const char* input; | 56 const char* input; |
56 const char* expected; | 57 const char* expected; |
57 url_parse::Component expected_component; | 58 url_parse::Component expected_component; |
58 bool expected_success; | 59 bool expected_success; |
59 }; | 60 }; |
60 | 61 |
61 // ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests | 62 // ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests |
62 // treat each input as optional, and will only try processing if non-NULL. | 63 // treat each input as optional, and will only try processing if non-NULL. |
63 // The output is always 8-bit. | 64 // The output is always 8-bit. |
64 struct DualComponentCase { | 65 struct DualComponentCase { |
65 const char* input8; | 66 const char* input8; |
66 const wchar_t* input16; | 67 const wchar_t* input16; |
67 const char* expected; | 68 const char* expected; |
68 url_parse::Component expected_component; | 69 url_parse::Component expected_component; |
69 bool expected_success; | 70 bool expected_success; |
70 }; | 71 }; |
71 | 72 |
| 73 // Test cases for CanonicalizeIPAddress(). The inputs are identical to |
| 74 // DualComponentCase, but the output has extra CanonHostInfo fields. |
| 75 struct IPAddressCase { |
| 76 const char* input8; |
| 77 const wchar_t* input16; |
| 78 const char* expected; |
| 79 url_parse::Component expected_component; |
| 80 |
| 81 // CanonHostInfo fields, for verbose output. |
| 82 CanonHostInfo::Family expected_family; |
| 83 int expected_num_ipv4_components; |
| 84 }; |
| 85 |
72 struct ReplaceCase { | 86 struct ReplaceCase { |
73 const char* base; | 87 const char* base; |
74 const char* scheme; | 88 const char* scheme; |
75 const char* username; | 89 const char* username; |
76 const char* password; | 90 const char* password; |
77 const char* host; | 91 const char* host; |
78 const char* port; | 92 const char* port; |
79 const char* path; | 93 const char* path; |
80 const char* query; | 94 const char* query; |
81 const char* ref; | 95 const char* ref; |
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
320 EXPECT_TRUE(url_canon::CanonicalizeScheme("", url_parse::Component(0, -1), | 334 EXPECT_TRUE(url_canon::CanonicalizeScheme("", url_parse::Component(0, -1), |
321 &output, &out_comp)); | 335 &output, &out_comp)); |
322 output.Complete(); | 336 output.Complete(); |
323 | 337 |
324 EXPECT_EQ(std::string(":"), out_str); | 338 EXPECT_EQ(std::string(":"), out_str); |
325 EXPECT_EQ(0, out_comp.begin); | 339 EXPECT_EQ(0, out_comp.begin); |
326 EXPECT_EQ(0, out_comp.len); | 340 EXPECT_EQ(0, out_comp.len); |
327 } | 341 } |
328 | 342 |
329 TEST(URLCanonTest, Host) { | 343 TEST(URLCanonTest, Host) { |
330 DualComponentCase host_cases[] = { | 344 IPAddressCase host_cases[] = { |
331 // Basic canonicalization, uppercase should be converted to lowercase. | 345 // Basic canonicalization, uppercase should be converted to lowercase. |
332 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", url_parse::Component(0, 10), tru
e}, | 346 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", url_parse::Component(0, 10), Can
onHostInfo::NEUTRAL, -1}, |
333 // Spaces and some other characters should be escaped. | 347 // Spaces and some other characters should be escaped. |
334 {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", url_
parse::Component(0, 22), true}, | 348 {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", url_
parse::Component(0, 22), CanonHostInfo::NEUTRAL, -1}, |
335 // Exciting different types of spaces! | 349 // Exciting different types of spaces! |
336 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", url_parse::Component(0
, 16), true}, | 350 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", url_parse::Component(0
, 16), CanonHostInfo::NEUTRAL, -1}, |
337 // Other types of space (no-break, zero-width, zero-width-no-break) are | 351 // Other types of space (no-break, zero-width, zero-width-no-break) are |
338 // name-prepped away to nothing. | 352 // name-prepped away to nothing. |
339 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", url_parse::Component(0
, 10), true}, | 353 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", url_parse::Component(0
, 10), CanonHostInfo::NEUTRAL, -1}, |
340 // Ideographic full stop (full-width period for Chinese, etc.) should be | 354 // Ideographic full stop (full-width period for Chinese, etc.) should be |
341 // treated as a dot. | 355 // treated as a dot. |
342 {NULL, L"www.foo\x3002"L"bar.com", "www.foo.bar.com", url_parse::Component(0
, 15), true}, | 356 {NULL, L"www.foo\x3002"L"bar.com", "www.foo.bar.com", url_parse::Component(0
, 15), CanonHostInfo::NEUTRAL, -1}, |
343 // Invalid unicode characters should fail... | 357 // Invalid unicode characters should fail... |
344 // ...In wide input, ICU will barf and we'll end up with the input as | 358 // ...In wide input, ICU will barf and we'll end up with the input as |
345 // escaped UTF-8 (the invalid character should be replaced with the | 359 // escaped UTF-8 (the invalid character should be replaced with the |
346 // replacement character). | 360 // replacement character). |
347 {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", url_parse::Com
ponent(0, 16), false}, | 361 {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", url_parse::Com
ponent(0, 16), CanonHostInfo::BROKEN, -1}, |
348 // ...This is the same as previous but with with escaped. | 362 // ...This is the same as previous but with with escaped. |
349 {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", url_parse::Com
ponent(0, 16), false}, | 363 {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", url_parse::Com
ponent(0, 16), CanonHostInfo::BROKEN, -1}, |
350 // Test name prepping, fullwidth input should be converted to ASCII and NO
T | 364 // Test name prepping, fullwidth input should be converted to ASCII and NO
T |
351 // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16. | 365 // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16. |
352 {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", url_parse::C
omponent(0, 6), true}, | 366 {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", url_parse::C
omponent(0, 6), CanonHostInfo::NEUTRAL, -1}, |
353 // Test that fullwidth escaped values are properly name-prepped, | 367 // Test that fullwidth escaped values are properly name-prepped, |
354 // then converted or rejected. | 368 // then converted or rejected. |
355 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input) | 369 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input) |
356 {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.c
om", url_parse::Component(0, 5), true}, | 370 {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.c
om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1}, |
357 {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.c
om", url_parse::Component(0, 5), true}, | 371 {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.c
om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1}, |
358 // ...%00 in fullwidth should fail (also as escaped UTF-8 input) | 372 // ...%00 in fullwidth should fail (also as escaped UTF-8 input) |
359 {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00
.com", url_parse::Component(0, 7), false}, | 373 {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00
.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1}, |
360 {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00
.com", url_parse::Component(0, 7), false}, | 374 {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00
.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1}, |
361 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN | 375 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN |
362 {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x5
97d", "xn--6qqa088eba", url_parse::Component(0, 14), true}, | 376 {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x5
97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1}
, |
363 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped | 377 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped |
364 // UTF-8 (wide case). The output should be equivalent to the true wide | 378 // UTF-8 (wide case). The output should be equivalent to the true wide |
365 // character input above). | 379 // character input above). |
366 {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x5
97d", "xn--6qqa088eba", url_parse::Component(0, 14), true}, | 380 {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x5
97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1}
, |
367 // Invalid escaped characters should fail and the percents should be | 381 // Invalid escaped characters should fail and the percents should be |
368 // escaped. | 382 // escaped. |
369 {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), false}, | 383 {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), CanonHo
stInfo::BROKEN, -1}, |
370 // If we get an invalid character that has been escaped. | 384 // If we get an invalid character that has been escaped. |
371 {"%25", L"%25", "%25", url_parse::Component(0, 3), false}, | 385 {"%25", L"%25", "%25", url_parse::Component(0, 3), CanonHostInfo::BROKEN, -1
}, |
372 {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), false}, | 386 {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), CanonHostI
nfo::BROKEN, -1}, |
373 // Escaped numbers should be treated like IP addresses if they are. | 387 // Escaped numbers should be treated like IP addresses if they are. |
374 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.1
68.0.1", url_parse::Component(0, 11), true}, | 388 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.1
68.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3}, |
375 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e",
"192.168.0.1", url_parse::Component(0, 11), true}, | 389 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e",
"192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3}, |
376 // Invalid escaping should trigger the regular host error handling. | 390 // Invalid escaping should trigger the regular host error handling. |
377 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01",
"%253gxc0.0250..01", url_parse::Component(0, 17), false}, | 391 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01",
"%253gxc0.0250..01", url_parse::Component(0, 17), CanonHostInfo::BROKEN, -1}, |
378 // Something that isn't exactly an IP should get treated as a host and | 392 // Something that isn't exactly an IP should get treated as a host and |
379 // spaces escaped. | 393 // spaces escaped. |
380 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse
::Component(0, 19), true}, | 394 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse
::Component(0, 19), CanonHostInfo::NEUTRAL, -1}, |
381 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. | 395 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. |
382 // These are "0Xc0.0250.01" in fullwidth. | 396 // These are "0Xc0.0250.01" in fullwidth. |
383 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x
92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x
ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_
parse::Component(0, 11), true}, | 397 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x
92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x
ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_
parse::Component(0, 11), CanonHostInfo::IPV4, 3}, |
| 398 // Broken IP addresses get marked as such. |
| 399 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", url_parse::Component(0,
13), CanonHostInfo::BROKEN, -1}, |
| 400 {"[google.com]", L"[google.com]", "[google.com]", url_parse::Component(0, 12
), CanonHostInfo::BROKEN, -1}, |
384 }; | 401 }; |
385 | 402 |
| 403 // CanonicalizeHost() non-verbose. |
386 std::string out_str; | 404 std::string out_str; |
387 for (size_t i = 0; i < arraysize(host_cases); i++) { | 405 for (size_t i = 0; i < arraysize(host_cases); i++) { |
388 // Narrow version. | 406 // Narrow version. |
389 if (host_cases[i].input8) { | 407 if (host_cases[i].input8) { |
390 int host_len = static_cast<int>(strlen(host_cases[i].input8)); | 408 int host_len = static_cast<int>(strlen(host_cases[i].input8)); |
391 url_parse::Component in_comp(0, host_len); | 409 url_parse::Component in_comp(0, host_len); |
392 url_parse::Component out_comp; | 410 url_parse::Component out_comp; |
393 | 411 |
394 out_str.clear(); | 412 out_str.clear(); |
395 url_canon::StdStringCanonOutput output(&out_str); | 413 url_canon::StdStringCanonOutput output(&out_str); |
396 | 414 |
397 bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp, | 415 bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp, |
398 &output, &out_comp); | 416 &output, &out_comp); |
399 output.Complete(); | 417 output.Complete(); |
400 | 418 |
401 EXPECT_EQ(host_cases[i].expected_success, success); | 419 EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN, |
| 420 success); |
402 EXPECT_EQ(std::string(host_cases[i].expected), out_str); | 421 EXPECT_EQ(std::string(host_cases[i].expected), out_str); |
403 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); | 422 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); |
404 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); | 423 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); |
405 } | 424 } |
406 | 425 |
407 // Wide version. | 426 // Wide version. |
408 if (host_cases[i].input16) { | 427 if (host_cases[i].input16) { |
409 string16 input16(WStringToUTF16(host_cases[i].input16)); | 428 string16 input16(WStringToUTF16(host_cases[i].input16)); |
410 int host_len = static_cast<int>(input16.length()); | 429 int host_len = static_cast<int>(input16.length()); |
411 url_parse::Component in_comp(0, host_len); | 430 url_parse::Component in_comp(0, host_len); |
412 url_parse::Component out_comp; | 431 url_parse::Component out_comp; |
413 | 432 |
414 out_str.clear(); | 433 out_str.clear(); |
415 url_canon::StdStringCanonOutput output(&out_str); | 434 url_canon::StdStringCanonOutput output(&out_str); |
416 | 435 |
417 bool success = url_canon::CanonicalizeHost(input16.c_str(), in_comp, | 436 bool success = url_canon::CanonicalizeHost(input16.c_str(), in_comp, |
418 &output, &out_comp); | 437 &output, &out_comp); |
419 output.Complete(); | 438 output.Complete(); |
420 | 439 |
421 EXPECT_EQ(host_cases[i].expected_success, success); | 440 EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN, |
| 441 success); |
422 EXPECT_EQ(std::string(host_cases[i].expected), out_str); | 442 EXPECT_EQ(std::string(host_cases[i].expected), out_str); |
423 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); | 443 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); |
424 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); | 444 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); |
425 } | 445 } |
426 } | 446 } |
| 447 |
| 448 // CanonicalizeHostVerbose() |
| 449 for (size_t i = 0; i < arraysize(host_cases); i++) { |
| 450 // Narrow version. |
| 451 if (host_cases[i].input8) { |
| 452 int host_len = static_cast<int>(strlen(host_cases[i].input8)); |
| 453 url_parse::Component in_comp(0, host_len); |
| 454 |
| 455 out_str.clear(); |
| 456 url_canon::StdStringCanonOutput output(&out_str); |
| 457 CanonHostInfo host_info; |
| 458 |
| 459 url_canon::CanonicalizeHostVerbose(host_cases[i].input8, in_comp, |
| 460 &output, &host_info); |
| 461 output.Complete(); |
| 462 |
| 463 EXPECT_EQ(host_cases[i].expected_family, host_info.family); |
| 464 EXPECT_EQ(std::string(host_cases[i].expected), out_str); |
| 465 EXPECT_EQ(host_cases[i].expected_component.begin, |
| 466 host_info.out_host.begin); |
| 467 EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len); |
| 468 if (host_cases[i].expected_family == CanonHostInfo::IPV4) { |
| 469 EXPECT_EQ(host_cases[i].expected_num_ipv4_components, |
| 470 host_info.num_ipv4_components); |
| 471 } |
| 472 } |
| 473 |
| 474 // Wide version. |
| 475 if (host_cases[i].input16) { |
| 476 string16 input16(WStringToUTF16(host_cases[i].input16)); |
| 477 int host_len = static_cast<int>(input16.length()); |
| 478 url_parse::Component in_comp(0, host_len); |
| 479 |
| 480 out_str.clear(); |
| 481 url_canon::StdStringCanonOutput output(&out_str); |
| 482 CanonHostInfo host_info; |
| 483 |
| 484 url_canon::CanonicalizeHostVerbose(input16.c_str(), in_comp, |
| 485 &output, &host_info); |
| 486 output.Complete(); |
| 487 |
| 488 EXPECT_EQ(host_cases[i].expected_family, host_info.family); |
| 489 EXPECT_EQ(std::string(host_cases[i].expected), out_str); |
| 490 EXPECT_EQ(host_cases[i].expected_component.begin, |
| 491 host_info.out_host.begin); |
| 492 EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len); |
| 493 if (host_cases[i].expected_family == CanonHostInfo::IPV4) { |
| 494 EXPECT_EQ(host_cases[i].expected_num_ipv4_components, |
| 495 host_info.num_ipv4_components); |
| 496 } |
| 497 } |
| 498 } |
427 } | 499 } |
428 | 500 |
429 TEST(URLCanonTest, IPv4) { | 501 TEST(URLCanonTest, IPv4) { |
430 DualComponentCase cases[] = { | 502 IPAddressCase cases[] = { |
431 // Empty is not an IP address. | 503 // Empty is not an IP address. |
432 {"", L"", "", url_parse::Component(), false}, | 504 {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1}, |
433 {".", L".", "", url_parse::Component(), false}, | 505 {".", L".", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1}, |
434 // Regular IP addresses in different bases. | 506 // Regular IP addresses in different bases. |
435 {"192.168.0.1", L"192.168.0.1", "192.168.0.1", url_parse::Component(0, 11),
true}, | 507 {"192.168.0.1", L"192.168.0.1", "192.168.0.1", url_parse::Component(0, 11),
CanonHostInfo::IPV4, 4}, |
436 {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", url_parse::Component(
0, 11), true}, | 508 {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", url_parse::Component(
0, 11), CanonHostInfo::IPV4, 4}, |
437 {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", url_parse::Compon
ent(0, 11), true}, | 509 {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", url_parse::Compon
ent(0, 11), CanonHostInfo::IPV4, 4}, |
438 // Non-IP addresses due to invalid characters. | 510 // Non-IP addresses due to invalid characters. |
439 {"192.168.9.com", L"192.168.9.com", "", url_parse::Component(), false}, | 511 {"192.168.9.com", L"192.168.9.com", "", url_parse::Component(), CanonHostInf
o::NEUTRAL, -1}, |
440 // Invalid characters for the base should be rejected. | 512 // Invalid characters for the base should be rejected. |
441 {"19a.168.0.1", L"19a.168.0.1", "", url_parse::Component(), false}, | 513 {"19a.168.0.1", L"19a.168.0.1", "", url_parse::Component(), CanonHostInfo::N
EUTRAL, -1}, |
442 {"0308.0250.00.01", L"0308.0250.00.01", "", url_parse::Component(), false}, | 514 {"0308.0250.00.01", L"0308.0250.00.01", "", url_parse::Component(), CanonHos
tInfo::NEUTRAL, -1}, |
443 {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", url_parse::Component(), fals
e}, | 515 {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", url_parse::Component(), Cano
nHostInfo::NEUTRAL, -1}, |
444 // If there are not enough components, the last one should fill them out. | 516 // If there are not enough components, the last one should fill them out. |
445 {"192", L"192", "0.0.0.192", url_parse::Component(0, 9), true}, | 517 {"192", L"192", "0.0.0.192", url_parse::Component(0, 9), CanonHostInfo::IPV4
, 1}, |
446 {"0xC0a80001", L"0xC0a80001", "192.168.0.1", url_parse::Component(0, 11), tr
ue}, | 518 {"0xC0a80001", L"0xC0a80001", "192.168.0.1", url_parse::Component(0, 11), Ca
nonHostInfo::IPV4, 1}, |
447 {"030052000001", L"030052000001", "192.168.0.1", url_parse::Component(0, 11)
, true}, | 519 {"030052000001", L"030052000001", "192.168.0.1", url_parse::Component(0, 11)
, CanonHostInfo::IPV4, 1}, |
448 {"000030052000001", L"000030052000001", "192.168.0.1", url_parse::Component(
0, 11), true}, | 520 {"000030052000001", L"000030052000001", "192.168.0.1", url_parse::Component(
0, 11), CanonHostInfo::IPV4, 1}, |
449 {"192.168", L"192.168", "192.0.0.168", url_parse::Component(0, 11), true}, | 521 {"192.168", L"192.168", "192.0.0.168", url_parse::Component(0, 11), CanonHos
tInfo::IPV4, 2}, |
450 {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", url_parse::Component(0
, 11), true}, | 522 {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", url_parse::Component(0
, 11), CanonHostInfo::IPV4, 2}, |
451 {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", url_parse::Component(0,
11), true}, | 523 {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", url_parse::Component(0,
11), CanonHostInfo::IPV4, 2}, |
452 {"192.168.1", L"192.168.1", "192.168.0.1", url_parse::Component(0, 11), true
}, | 524 {"192.168.1", L"192.168.1", "192.168.0.1", url_parse::Component(0, 11), Cano
nHostInfo::IPV4, 3}, |
453 // Too many components means not an IP address. | 525 // Too many components means not an IP address. |
454 {"192.168.0.0.1", L"192.168.0.0.1", "", url_parse::Component(), false}, | 526 {"192.168.0.0.1", L"192.168.0.0.1", "", url_parse::Component(), CanonHostInf
o::NEUTRAL, -1}, |
455 // We allow a single trailing dot. | 527 // We allow a single trailing dot. |
456 {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", url_parse::Component(0, 11)
, true}, | 528 {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", url_parse::Component(0, 11)
, CanonHostInfo::IPV4, 4}, |
457 {"192.168.0.1. hello", L"192.168.0.1. hello", "", url_parse::Component(), fa
lse}, | 529 {"192.168.0.1. hello", L"192.168.0.1. hello", "", url_parse::Component(), Ca
nonHostInfo::NEUTRAL, -1}, |
458 {"192.168.0.1..", L"192.168.0.1..", "", url_parse::Component(), false}, | 530 {"192.168.0.1..", L"192.168.0.1..", "", url_parse::Component(), CanonHostInf
o::NEUTRAL, -1}, |
459 // Two dots in a row means not an IP address. | 531 // Two dots in a row means not an IP address. |
460 {"192.168..1", L"192.168..1", "", url_parse::Component(), false}, | 532 {"192.168..1", L"192.168..1", "", url_parse::Component(), CanonHostInfo::NEU
TRAL, -1}, |
461 // Any non-first components get truncated to one byte. | 533 // Any numerical overflow should be marked as BROKEN. |
462 {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "20.0.162.255", url_pars
e::Component(0, 12), true}, | 534 {"0x100.0", L"0x100.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -
1}, |
463 // The last component should get truncated to however much space is | 535 {"0x100.0.0", L"0x100.0.0", "", url_parse::Component(), CanonHostInfo::BROKE
N, -1}, |
464 // remaining. | 536 {"0x100.0.0.0", L"0x100.0.0.0", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
465 {"192.168.0.257", L"192.168.0.257", "192.168.0.1", url_parse::Component(0, 1
1), true}, | 537 {"0.0x100.0.0", L"0.0x100.0.0", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
466 {"192.168.0xa20001", L"192.168.0xa20001", "192.168.0.1", url_parse::Componen
t(0, 11), true}, | 538 {"0.0.0x100.0", L"0.0.0x100.0", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
467 {"192.015052000001", L"192.015052000001", "192.168.0.1", url_parse::Componen
t(0, 11), true}, | 539 {"0.0.0.0x100", L"0.0.0.0x100", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
468 {"0X12C0a80001", L"0X12C0a80001", "192.168.0.1", url_parse::Component(0, 11)
, true}, | 540 {"0.0.0x10000", L"0.0.0x10000", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
| 541 {"0.0x1000000", L"0.0x1000000", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
| 542 {"0x100000000", L"0x100000000", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
| 543 // Repeat the previous tests, minus 1, to verify boundaries. |
| 544 {"0xFF.0", L"0xFF.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo
::IPV4, 2}, |
| 545 {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", url_parse::Component(0, 9), CanonHost
Info::IPV4, 3}, |
| 546 {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", url_parse::Component(0, 9), Canon
HostInfo::IPV4, 4}, |
| 547 {"0.0xFF.0.0", L"0.0xFF.0.0", "0.255.0.0", url_parse::Component(0, 9), Canon
HostInfo::IPV4, 4}, |
| 548 {"0.0.0xFF.0", L"0.0.0xFF.0", "0.0.255.0", url_parse::Component(0, 9), Canon
HostInfo::IPV4, 4}, |
| 549 {"0.0.0.0xFF", L"0.0.0.0xFF", "0.0.0.255", url_parse::Component(0, 9), Canon
HostInfo::IPV4, 4}, |
| 550 {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", url_parse::Component(0, 11), Ca
nonHostInfo::IPV4, 3}, |
| 551 {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", url_parse::Component(0, 13),
CanonHostInfo::IPV4, 2}, |
| 552 {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", url_parse::Component(0, 15)
, CanonHostInfo::IPV4, 1}, |
| 553 // Old trunctations tests. They're all "BROKEN" now. |
| 554 {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", url_parse::Component
(), CanonHostInfo::BROKEN, -1}, |
| 555 {"192.168.0.257", L"192.168.0.257", "", url_parse::Component(), CanonHostInf
o::BROKEN, -1}, |
| 556 {"192.168.0xa20001", L"192.168.0xa20001", "", url_parse::Component(), CanonH
ostInfo::BROKEN, -1}, |
| 557 {"192.015052000001", L"192.015052000001", "", url_parse::Component(), CanonH
ostInfo::BROKEN, -1}, |
| 558 {"0X12C0a80001", L"0X12C0a80001", "", url_parse::Component(), CanonHostInfo:
:BROKEN, -1}, |
| 559 {"276.1.2", L"276.1.2", "", url_parse::Component(), CanonHostInfo::BROKEN, -
1}, |
469 // Spaces should be rejected. | 560 // Spaces should be rejected. |
470 {"192.168.0.1 hello", L"192.168.0.1 hello", "", url_parse::Component(), fals
e}, | 561 {"192.168.0.1 hello", L"192.168.0.1 hello", "", url_parse::Component(), Cano
nHostInfo::NEUTRAL, -1}, |
471 // Truncation plus the last component missing. | 562 // Very large numbers. |
472 {"276.1.2", L"276.1.2", "20.1.0.2", url_parse::Component(0, 8), true}, | 563 {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300
.0x00000000000000fF.00000000000000001", "192.255.0.1", url_parse::Component(0, 1
1), CanonHostInfo::IPV4, 3}, |
473 // Very large numbers. We support up to 16 characters per component | 564 {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.
0xffffffffFFFFFFFF.3022415481470977", "", url_parse::Component(0, 11), CanonHost
Info::BROKEN, -1}, |
474 // before rejecting. | 565 // A number has no length limit, but long numbers can still overflow. |
475 {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.
0xffffffffFFFFFFFF.3022415481470977", "192.255.0.1", url_parse::Component(0, 11)
, true}, | 566 {"00000000000000000001", L"00000000000000000001", "0.0.0.1", url_parse::Comp
onent(0, 7), CanonHostInfo::IPV4, 1}, |
476 {"000000000000000300.168.1", L"000000000000000300.168.1", "", url_parse::Com
ponent(), false}, | 567 {"0000000000000000100000000000000001", L"0000000000000000100000000000000001"
, "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
| 568 // If a long component is non-numeric, it's a hostname, *not* a broken IP. |
| 569 {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", url_parse::C
omponent(), CanonHostInfo::NEUTRAL, -1}, |
| 570 {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", url_parse::C
omponent(), CanonHostInfo::NEUTRAL, -1}, |
| 571 // Truncation of all zeros should still result in 0. |
| 572 {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", url_parse::Component(0, 7), Canon
HostInfo::IPV4, 4}, |
477 }; | 573 }; |
478 | 574 |
479 for (size_t i = 0; i < arraysize(cases); i++) { | 575 for (size_t i = 0; i < arraysize(cases); i++) { |
480 // 8-bit version. | 576 // 8-bit version. |
481 url_parse::Component component(0, | 577 url_parse::Component component(0, |
482 static_cast<int>(strlen(cases[i].input8))); | 578 static_cast<int>(strlen(cases[i].input8))); |
483 | 579 |
484 std::string out_str1; | 580 std::string out_str1; |
485 url_canon::StdStringCanonOutput output1(&out_str1); | 581 url_canon::StdStringCanonOutput output1(&out_str1); |
486 url_parse::Component out_ip; | 582 url_canon::CanonHostInfo host_info; |
487 bool success = url_canon::CanonicalizeIPAddress(cases[i].input8, component, | 583 url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1, |
488 &output1, &out_ip); | 584 &host_info); |
489 output1.Complete(); | 585 output1.Complete(); |
490 | 586 |
491 EXPECT_EQ(cases[i].expected_success, success); | 587 EXPECT_EQ(cases[i].expected_family, host_info.family); |
492 if (success) { | 588 if (host_info.family == CanonHostInfo::IPV4) { |
493 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); | 589 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); |
494 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin); | 590 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); |
495 EXPECT_EQ(cases[i].expected_component.len, out_ip.len); | 591 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); |
| 592 EXPECT_EQ(cases[i].expected_num_ipv4_components, |
| 593 host_info.num_ipv4_components); |
496 } | 594 } |
497 | 595 |
498 // 16-bit version. | 596 // 16-bit version. |
499 string16 input16(WStringToUTF16(cases[i].input16)); | 597 string16 input16(WStringToUTF16(cases[i].input16)); |
500 component = url_parse::Component(0, static_cast<int>(input16.length())); | 598 component = url_parse::Component(0, static_cast<int>(input16.length())); |
501 | 599 |
502 std::string out_str2; | 600 std::string out_str2; |
503 url_canon::StdStringCanonOutput output2(&out_str2); | 601 url_canon::StdStringCanonOutput output2(&out_str2); |
504 success = url_canon::CanonicalizeIPAddress(input16.c_str(), component, | 602 url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2, |
505 &output2, &out_ip); | 603 &host_info); |
506 output2.Complete(); | 604 output2.Complete(); |
507 | 605 |
508 EXPECT_EQ(cases[i].expected_success, success); | 606 EXPECT_EQ(cases[i].expected_family, host_info.family); |
509 if (success) { | 607 if (host_info.family == CanonHostInfo::IPV4) { |
510 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); | 608 EXPECT_STREQ(cases[i].expected, out_str2.c_str()); |
511 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin); | 609 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); |
512 EXPECT_EQ(cases[i].expected_component.len, out_ip.len); | 610 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); |
| 611 EXPECT_EQ(cases[i].expected_num_ipv4_components, |
| 612 host_info.num_ipv4_components); |
513 } | 613 } |
514 } | 614 } |
515 } | 615 } |
516 | 616 |
517 TEST(URLCanonTest, IPv6) { | 617 TEST(URLCanonTest, IPv6) { |
518 DualComponentCase cases[] = { | 618 IPAddressCase cases[] = { |
519 // Empty is not an IP address. | 619 // Empty is not an IP address. |
520 {"", L"", "", url_parse::Component(), false}, | 620 {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1}, |
521 {":", L":", "", url_parse::Component(), false}, | 621 // Non-IPs with [:] characters are marked BROKEN. |
522 {"[", L"[", "", url_parse::Component(), false}, | 622 {":", L":", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
523 {"[:", L"[:", "", url_parse::Component(), false}, | 623 {"[", L"[", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
524 {"]", L"]", "", url_parse::Component(), false}, | 624 {"[:", L"[:", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
525 {":]", L":]", "", url_parse::Component(), false}, | 625 {"]", L"]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
526 {"[]", L"[]", "", url_parse::Component(), false}, | 626 {":]", L":]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
527 {"[:]", L"[:]", "", url_parse::Component(), false}, | 627 {"[]", L"[]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
| 628 {"[:]", L"[:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
528 // Regular IP address is invalid without bounding '[' and ']'. | 629 // Regular IP address is invalid without bounding '[' and ']'. |
529 {"2001:db8::1", L"2001:db8::1", "", url_parse::Component(), false}, | 630 {"2001:db8::1", L"2001:db8::1", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
530 {"[2001:db8::1", L"[2001:db8::1", "", url_parse::Component(), false}, | 631 {"[2001:db8::1", L"[2001:db8::1", "", url_parse::Component(), CanonHostInfo:
:BROKEN, -1}, |
531 {"2001:db8::1]", L"2001:db8::1]", "", url_parse::Component(), false}, | 632 {"2001:db8::1]", L"2001:db8::1]", "", url_parse::Component(), CanonHostInfo:
:BROKEN, -1}, |
532 // Regular IP addresses. | 633 // Regular IP addresses. |
533 {"[::]", L"[::]", "[::]", url_parse::Component(0,4), true}, | 634 {"[::]", L"[::]", "[::]", url_parse::Component(0,4), CanonHostInfo::IPV6, -1
}, |
534 {"[::1]", L"[::1]", "[::1]", url_parse::Component(0,5), true}, | 635 {"[::1]", L"[::1]", "[::1]", url_parse::Component(0,5), CanonHostInfo::IPV6,
-1}, |
535 {"[1::]", L"[1::]", "[1::]", url_parse::Component(0,5), true}, | 636 {"[1::]", L"[1::]", "[1::]", url_parse::Component(0,5), CanonHostInfo::IPV6,
-1}, |
536 {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", url_parse::Component(0
,10), true}, | 637 {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", url_parse::Component(0
,10), CanonHostInfo::IPV6, -1}, |
537 {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", url_par
se::Component(0,15), true}, | 638 {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", url_par
se::Component(0,15), CanonHostInfo::IPV6, -1}, |
538 | 639 |
539 // Leading zeros should be stripped. | 640 // Leading zeros should be stripped. |
540 {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4:
5:6:7]", url_parse::Component(0,17), true}, | 641 {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4:
5:6:7]", url_parse::Component(0,17), CanonHostInfo::IPV6, -1}, |
541 | 642 |
542 // Upper case letters should be lowercased. | 643 // Upper case letters should be lowercased. |
543 {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", ur
l_parse::Component(0,20), true}, | 644 {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", ur
l_parse::Component(0,20), CanonHostInfo::IPV6, -1}, |
544 | 645 |
545 // The same address can be written with different contractions, but should | 646 // The same address can be written with different contractions, but should |
546 // get canonicalized to the same thing. | 647 // get canonicalized to the same thing. |
547 {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", url_parse::Component
(0,14), true}, | 648 {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", url_parse::Component
(0,14), CanonHostInfo::IPV6, -1}, |
548 {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", url_parse::Component
(0,14), true}, | 649 {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", url_parse::Component
(0,14), CanonHostInfo::IPV6, -1}, |
549 | 650 |
550 // IPv4 addresses | 651 // IPv4 addresses |
551 // Only mapped and compat addresses can have IPv4 syntax embedded. | 652 // Only mapped and compat addresses can have IPv4 syntax embedded. |
552 {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "", url_parse::Component()
, false}, | 653 {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "", url_parse::Component()
, CanonHostInfo::BROKEN, -1}, |
553 {"[2001::192.168.0.1]", L"[2001::92.168.0.1]", "", url_parse::Component(), f
alse}, | 654 {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "", url_parse::Component(),
CanonHostInfo::BROKEN, -1}, |
554 {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", url_parse::Component
(), false}, | 655 {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", url_parse::Component
(), CanonHostInfo::BROKEN, -1}, |
555 | 656 |
556 // IPv4 with truncation and last component missing. | 657 // IPv4 with last component missing. |
557 {"[::ffff:276.1.2]", L"[::ffff:276.1.2]", "[::ffff:1401:2]", url_parse::Comp
onent(0,15), true}, | 658 {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", url_parse::Comp
onent(0,15), CanonHostInfo::IPV6, -1}, |
558 | 659 |
559 // IPv4 using hex. | 660 // IPv4 using hex. |
560 // TODO(eroman): Should this format be disallowed? | 661 // TODO(eroman): Should this format be disallowed? |
561 {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8:
1]", url_parse::Component(0,15), true}, | 662 {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8:
1]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1}, |
562 | 663 |
563 // There may be zeros surrounding the "::" contraction. | 664 // There may be zeros surrounding the "::" contraction. |
564 {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", url_parse::Component(0,5), true}, | 665 {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", url_parse::Component(0,5), CanonH
ostInfo::IPV6, -1}, |
565 | 666 |
566 {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", url_parse::Component(0,
13), true}, | 667 {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", url_parse::Component(0,
13), CanonHostInfo::IPV6, -1}, |
567 | 668 |
568 // Can only have one "::" contraction in an IPv6 string literal. | 669 // Can only have one "::" contraction in an IPv6 string literal. |
569 {"[2001::db8::1]", L"[2001::db8::1]", "", url_parse::Component(), false}, | 670 {"[2001::db8::1]", L"[2001::db8::1]", "", url_parse::Component(), CanonHostI
nfo::BROKEN, -1}, |
570 // No more than 2 consecutive ':'s. | 671 // No more than 2 consecutive ':'s. |
571 {"[2001:db8:::1]", L"[2001:db8:::1]", "", url_parse::Component(), false}, | 672 {"[2001:db8:::1]", L"[2001:db8:::1]", "", url_parse::Component(), CanonHostI
nfo::BROKEN, -1}, |
572 {"[:::]", L"[:::]", "", url_parse::Component(), false}, | 673 {"[:::]", L"[:::]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
573 // Non-IP addresses due to invalid characters. | 674 // Non-IP addresses due to invalid characters. |
574 {"[2001::.com]", L"[2001::.com]", "", url_parse::Component(), false}, | 675 {"[2001::.com]", L"[2001::.com]", "", url_parse::Component(), CanonHostInfo:
:BROKEN, -1}, |
575 // If there are not enough components, the last one should fill them out. | 676 // If there are not enough components, the last one should fill them out. |
576 // ... omitted at this time ... | 677 // ... omitted at this time ... |
577 // Too many components means not an IP address. Similarly with too few if
using IPv4 compat or mapped addresses. | 678 // Too many components means not an IP address. Similarly with too few if
using IPv4 compat or mapped addresses. |
578 {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", url_parse::Component(), fals
e}, | 679 {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", url_parse::Component(), Cano
nHostInfo::BROKEN, -1}, |
579 {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", url_parse::Compone
nt(), false}, | 680 {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", url_parse::Compone
nt(), CanonHostInfo::BROKEN, -1}, |
580 {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", url_parse::Component(),
false}, | 681 {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", url_parse::Component(),
CanonHostInfo::BROKEN, -1}, |
581 // Too many bits (even though 8 comonents, the last one holds 32 bits). | 682 // Too many bits (even though 8 comonents, the last one holds 32 bits). |
582 {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", url_pars
e::Component(), false}, | 683 {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", url_pars
e::Component(), CanonHostInfo::BROKEN, -1}, |
583 | 684 |
584 // Too many bits specified -- the contraction would have to be zero-length | 685 // Too many bits specified -- the contraction would have to be zero-length |
585 // to not exceed 128 bits. | 686 // to not exceed 128 bits. |
586 {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", url_parse:
:Component(), false}, | 687 {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", url_parse:
:Component(), CanonHostInfo::BROKEN, -1}, |
587 | 688 |
588 // The contraction is for 16 bits of zero. | 689 // The contraction is for 16 bits of zero. |
589 {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", url_parse::Co
mponent(0,17), true}, | 690 {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", url_parse::Co
mponent(0,17), CanonHostInfo::IPV6, -1}, |
590 | 691 |
591 // Cannot have a trailing colon. | 692 // Cannot have a trailing colon. |
592 {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", url_parse::Component(), fa
lse}, | 693 {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", url_parse::Component(), Ca
nonHostInfo::BROKEN, -1}, |
593 {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", url_parse:
:Component(), false}, | 694 {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", url_parse:
:Component(), CanonHostInfo::BROKEN, -1}, |
594 | 695 |
595 // Cannot have negative numbers. | 696 // Cannot have negative numbers. |
596 {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", url_parse::Component(), fa
lse}, | 697 {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", url_parse::Component(), Ca
nonHostInfo::BROKEN, -1}, |
597 | 698 |
598 // Scope ID -- the URL may contain an optional ["%" <scope_id>] section. | 699 // Scope ID -- the URL may contain an optional ["%" <scope_id>] section. |
599 // The scope_id should be included in the canonicalized URL, and is an | 700 // The scope_id should be included in the canonicalized URL, and is an |
600 // unsigned decimal number. | 701 // unsigned decimal number. |
601 | 702 |
602 // Invalid because no ID was given after the percent. | 703 // Invalid because no ID was given after the percent. |
603 | 704 |
604 // Don't allow scope-id | 705 // Don't allow scope-id |
605 {"[1::%1]", L"[1::%1]", "", url_parse::Component(), false}, | 706 {"[1::%1]", L"[1::%1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -
1}, |
606 {"[1::%eth0]", L"[1::%eth0]", "", url_parse::Component(), false}, | 707 {"[1::%eth0]", L"[1::%eth0]", "", url_parse::Component(), CanonHostInfo::BRO
KEN, -1}, |
607 {"[1::%]", L"[1::%]", "", url_parse::Component(), false}, | 708 {"[1::%]", L"[1::%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}
, |
608 {"[%]", L"[%]", "", url_parse::Component(), false}, | 709 {"[%]", L"[%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}, |
609 {"[::%:]", L"[::%:]", "", url_parse::Component(), false}, | 710 {"[::%:]", L"[::%:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1}
, |
610 | 711 |
611 // Don't allow leading or trailing colons. | 712 // Don't allow leading or trailing colons. |
612 {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", url_parse::Component(), false}, | 713 {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", url_parse::Component(), CanonHostInf
o::BROKEN, -1}, |
613 {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", url_parse::Component(), false}, | 714 {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", url_parse::Component(), CanonHostInf
o::BROKEN, -1}, |
614 {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", url_parse::Component(), false}, | 715 {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", url_parse::Component(), CanonHostI
nfo::BROKEN, -1}, |
615 | 716 |
616 // We allow a single trailing dot. | 717 // We allow a single trailing dot. |
617 // ... omitted at this time ... | 718 // ... omitted at this time ... |
618 // Two dots in a row means not an IP address. | 719 // Two dots in a row means not an IP address. |
619 {"[::192.168..1]", L"[::192.168..1]", "", url_parse::Component(), false}, | 720 {"[::192.168..1]", L"[::192.168..1]", "", url_parse::Component(), CanonHostI
nfo::BROKEN, -1}, |
620 // Any non-first components get truncated to one byte. | 721 // Any non-first components get truncated to one byte. |
621 // ... omitted at this time ... | 722 // ... omitted at this time ... |
622 // Spaces should be rejected. | 723 // Spaces should be rejected. |
623 {"[::1 hello]", L"[::1 hello]", "", url_parse::Component(), false}, | 724 {"[::1 hello]", L"[::1 hello]", "", url_parse::Component(), CanonHostInfo::B
ROKEN, -1}, |
624 }; | 725 }; |
625 | 726 |
626 for (size_t i = 0; i < arraysize(cases); i++) { | 727 for (size_t i = 0; i < arraysize(cases); i++) { |
627 // Print some context of what test we were on, to help debug failures. | |
628 SCOPED_TRACE(cases[i].input8); | |
629 | |
630 // 8-bit version. | 728 // 8-bit version. |
631 url_parse::Component component(0, | 729 url_parse::Component component(0, |
632 static_cast<int>(strlen(cases[i].input8))); | 730 static_cast<int>(strlen(cases[i].input8))); |
633 | 731 |
634 std::string out_str1; | 732 std::string out_str1; |
635 url_canon::StdStringCanonOutput output1(&out_str1); | 733 url_canon::StdStringCanonOutput output1(&out_str1); |
636 url_parse::Component out_ip; | 734 url_canon::CanonHostInfo host_info; |
637 bool success = url_canon::CanonicalizeIPAddress(cases[i].input8, component, | 735 url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1, |
638 &output1, &out_ip); | 736 &host_info); |
639 output1.Complete(); | 737 output1.Complete(); |
640 | 738 |
641 EXPECT_EQ(cases[i].expected_success, success); | 739 EXPECT_EQ(cases[i].expected_family, host_info.family); |
642 if (success) { | 740 if (host_info.family == CanonHostInfo::IPV6) { |
643 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); | 741 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); |
644 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin); | 742 EXPECT_EQ(cases[i].expected_component.begin, |
645 EXPECT_EQ(cases[i].expected_component.len, out_ip.len); | 743 host_info.out_host.begin); |
| 744 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); |
646 } | 745 } |
647 | 746 |
648 // 16-bit version. | 747 // 16-bit version. |
649 string16 input16(WStringToUTF16(cases[i].input16)); | 748 string16 input16(WStringToUTF16(cases[i].input16)); |
650 component = url_parse::Component(0, static_cast<int>(input16.length())); | 749 component = url_parse::Component(0, static_cast<int>(input16.length())); |
651 | 750 |
652 std::string out_str2; | 751 std::string out_str2; |
653 url_canon::StdStringCanonOutput output2(&out_str2); | 752 url_canon::StdStringCanonOutput output2(&out_str2); |
654 success = url_canon::CanonicalizeIPAddress(input16.c_str(), component, | 753 url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2, |
655 &output2, &out_ip); | 754 &host_info); |
656 output2.Complete(); | 755 output2.Complete(); |
657 | 756 |
658 EXPECT_EQ(cases[i].expected_success, success); | 757 EXPECT_EQ(cases[i].expected_family, host_info.family); |
659 if (success) { | 758 if (host_info.family == CanonHostInfo::IPV6) { |
660 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); | 759 EXPECT_STREQ(cases[i].expected, out_str2.c_str()); |
661 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin); | 760 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); |
662 EXPECT_EQ(cases[i].expected_component.len, out_ip.len); | 761 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); |
663 } | 762 } |
664 } | 763 } |
665 } | 764 } |
666 | 765 |
667 TEST(URLCanonTest, UserInfo) { | 766 TEST(URLCanonTest, UserInfo) { |
668 // Note that the canonicalizer should escape and treat empty components as | 767 // Note that the canonicalizer should escape and treat empty components as |
669 // not being there. | 768 // not being there. |
670 | 769 |
671 // We actually parse a full input URL so we can get the initial components. | 770 // We actually parse a full input URL so we can get the initial components. |
672 struct UserComponentCase { | 771 struct UserComponentCase { |
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1071 TEST(URLCanonTest, CanonicalizeStandardURL) { | 1170 TEST(URLCanonTest, CanonicalizeStandardURL) { |
1072 // The individual component canonicalize tests should have caught the cases | 1171 // The individual component canonicalize tests should have caught the cases |
1073 // for each of those components. Here, we just need to test that the various | 1172 // for each of those components. Here, we just need to test that the various |
1074 // parts are included or excluded properly, and have the correct separators. | 1173 // parts are included or excluded properly, and have the correct separators. |
1075 struct URLCase { | 1174 struct URLCase { |
1076 const char* input; | 1175 const char* input; |
1077 const char* expected; | 1176 const char* expected; |
1078 bool expected_success; | 1177 bool expected_success; |
1079 } cases[] = { | 1178 } cases[] = { |
1080 {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#",
true}, | 1179 {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#",
true}, |
| 1180 {"http://[www.google.com]/", "http://[www.google.com]/", false}, |
1081 {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false
}, | 1181 {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false
}, |
1082 {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", t
rue}, | 1182 {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", t
rue}, |
1083 {"www.google.com", ":www.google.com/", true}, | 1183 {"www.google.com", ":www.google.com/", true}, |
1084 {"http://192.0x00A80001", "http://192.168.0.1/", true}, | 1184 {"http://192.0x00A80001", "http://192.168.0.1/", true}, |
1085 {"http://www/foo%2Ehtml", "http://www/foo.html", true}, | 1185 {"http://www/foo%2Ehtml", "http://www/foo.html", true}, |
1086 | 1186 |
1087 // Backslashes should get converted to forward slashes. | 1187 // Backslashes should get converted to forward slashes. |
1088 {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true}, | 1188 {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true}, |
1089 | 1189 |
1090 // Busted refs shouldn't make the whole thing fail. | 1190 // Busted refs shouldn't make the whole thing fail. |
(...skipping 717 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1808 url_canon::StdStringCanonOutput repl_output(&repl_str); | 1908 url_canon::StdStringCanonOutput repl_output(&repl_str); |
1809 url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed)
; | 1909 url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed)
; |
1810 repl_output.Complete(); | 1910 repl_output.Complete(); |
1811 | 1911 |
1812 // Generate the expected string and check. | 1912 // Generate the expected string and check. |
1813 std::string expected("file:///foo?"); | 1913 std::string expected("file:///foo?"); |
1814 for (size_t i = 0; i < new_query.length(); i++) | 1914 for (size_t i = 0; i < new_query.length(); i++) |
1815 expected.push_back('a'); | 1915 expected.push_back('a'); |
1816 EXPECT_TRUE(expected == repl_str); | 1916 EXPECT_TRUE(expected == repl_str); |
1817 } | 1917 } |
OLD | NEW |