Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/url_canon_unittest.cc

Issue 114050: url_canon: New CanonicalizeHostVerbose() function. (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/
Patch Set: Address brettw's comments Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/url_canon_ip.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2007, Google Inc. 1 // Copyright 2007, Google Inc.
2 // All rights reserved. 2 // All rights reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are 5 // modification, are permitted provided that the following conditions are
6 // met: 6 // met:
7 // 7 //
8 // * Redistributions of source code must retain the above copyright 8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer. 9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above 10 // * Redistributions in binary form must reproduce the above
(...skipping 30 matching lines...) Expand all
41 // Some implementations of base/basictypes.h may define ARRAYSIZE. 41 // Some implementations of base/basictypes.h may define ARRAYSIZE.
42 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro 42 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
43 // which is in our version of basictypes.h. 43 // which is in our version of basictypes.h.
44 #ifndef ARRAYSIZE 44 #ifndef ARRAYSIZE
45 #define ARRAYSIZE ARRAYSIZE_UNSAFE 45 #define ARRAYSIZE ARRAYSIZE_UNSAFE
46 #endif 46 #endif
47 47
48 using url_test_utils::WStringToUTF16; 48 using url_test_utils::WStringToUTF16;
49 using url_test_utils::ConvertUTF8ToUTF16; 49 using url_test_utils::ConvertUTF8ToUTF16;
50 using url_test_utils::ConvertUTF16ToUTF8; 50 using url_test_utils::ConvertUTF16ToUTF8;
51 using url_canon::CanonHostInfo;
51 52
52 namespace { 53 namespace {
53 54
54 struct ComponentCase { 55 struct ComponentCase {
55 const char* input; 56 const char* input;
56 const char* expected; 57 const char* expected;
57 url_parse::Component expected_component; 58 url_parse::Component expected_component;
58 bool expected_success; 59 bool expected_success;
59 }; 60 };
60 61
61 // ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests 62 // ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests
62 // treat each input as optional, and will only try processing if non-NULL. 63 // treat each input as optional, and will only try processing if non-NULL.
63 // The output is always 8-bit. 64 // The output is always 8-bit.
64 struct DualComponentCase { 65 struct DualComponentCase {
65 const char* input8; 66 const char* input8;
66 const wchar_t* input16; 67 const wchar_t* input16;
67 const char* expected; 68 const char* expected;
68 url_parse::Component expected_component; 69 url_parse::Component expected_component;
69 bool expected_success; 70 bool expected_success;
70 }; 71 };
71 72
73 // Test cases for CanonicalizeIPAddress(). The inputs are identical to
74 // DualComponentCase, but the output has extra CanonHostInfo fields.
75 struct IPAddressCase {
76 const char* input8;
77 const wchar_t* input16;
78 const char* expected;
79 url_parse::Component expected_component;
80
81 // CanonHostInfo fields, for verbose output.
82 CanonHostInfo::Family expected_family;
83 int expected_num_ipv4_components;
84 };
85
72 struct ReplaceCase { 86 struct ReplaceCase {
73 const char* base; 87 const char* base;
74 const char* scheme; 88 const char* scheme;
75 const char* username; 89 const char* username;
76 const char* password; 90 const char* password;
77 const char* host; 91 const char* host;
78 const char* port; 92 const char* port;
79 const char* path; 93 const char* path;
80 const char* query; 94 const char* query;
81 const char* ref; 95 const char* ref;
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
320 EXPECT_TRUE(url_canon::CanonicalizeScheme("", url_parse::Component(0, -1), 334 EXPECT_TRUE(url_canon::CanonicalizeScheme("", url_parse::Component(0, -1),
321 &output, &out_comp)); 335 &output, &out_comp));
322 output.Complete(); 336 output.Complete();
323 337
324 EXPECT_EQ(std::string(":"), out_str); 338 EXPECT_EQ(std::string(":"), out_str);
325 EXPECT_EQ(0, out_comp.begin); 339 EXPECT_EQ(0, out_comp.begin);
326 EXPECT_EQ(0, out_comp.len); 340 EXPECT_EQ(0, out_comp.len);
327 } 341 }
328 342
329 TEST(URLCanonTest, Host) { 343 TEST(URLCanonTest, Host) {
330 DualComponentCase host_cases[] = { 344 IPAddressCase host_cases[] = {
331 // Basic canonicalization, uppercase should be converted to lowercase. 345 // Basic canonicalization, uppercase should be converted to lowercase.
332 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", url_parse::Component(0, 10), tru e}, 346 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", url_parse::Component(0, 10), Can onHostInfo::NEUTRAL, -1},
333 // Spaces and some other characters should be escaped. 347 // Spaces and some other characters should be escaped.
334 {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", url_ parse::Component(0, 22), true}, 348 {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", url_ parse::Component(0, 22), CanonHostInfo::NEUTRAL, -1},
335 // Exciting different types of spaces! 349 // Exciting different types of spaces!
336 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", url_parse::Component(0 , 16), true}, 350 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", url_parse::Component(0 , 16), CanonHostInfo::NEUTRAL, -1},
337 // Other types of space (no-break, zero-width, zero-width-no-break) are 351 // Other types of space (no-break, zero-width, zero-width-no-break) are
338 // name-prepped away to nothing. 352 // name-prepped away to nothing.
339 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", url_parse::Component(0 , 10), true}, 353 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", url_parse::Component(0 , 10), CanonHostInfo::NEUTRAL, -1},
340 // Ideographic full stop (full-width period for Chinese, etc.) should be 354 // Ideographic full stop (full-width period for Chinese, etc.) should be
341 // treated as a dot. 355 // treated as a dot.
342 {NULL, L"www.foo\x3002"L"bar.com", "www.foo.bar.com", url_parse::Component(0 , 15), true}, 356 {NULL, L"www.foo\x3002"L"bar.com", "www.foo.bar.com", url_parse::Component(0 , 15), CanonHostInfo::NEUTRAL, -1},
343 // Invalid unicode characters should fail... 357 // Invalid unicode characters should fail...
344 // ...In wide input, ICU will barf and we'll end up with the input as 358 // ...In wide input, ICU will barf and we'll end up with the input as
345 // escaped UTF-8 (the invalid character should be replaced with the 359 // escaped UTF-8 (the invalid character should be replaced with the
346 // replacement character). 360 // replacement character).
347 {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", url_parse::Com ponent(0, 16), false}, 361 {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", url_parse::Com ponent(0, 16), CanonHostInfo::BROKEN, -1},
348 // ...This is the same as previous but with with escaped. 362 // ...This is the same as previous but with with escaped.
349 {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", url_parse::Com ponent(0, 16), false}, 363 {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", url_parse::Com ponent(0, 16), CanonHostInfo::BROKEN, -1},
350 // Test name prepping, fullwidth input should be converted to ASCII and NO T 364 // Test name prepping, fullwidth input should be converted to ASCII and NO T
351 // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16. 365 // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
352 {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", url_parse::C omponent(0, 6), true}, 366 {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", url_parse::C omponent(0, 6), CanonHostInfo::NEUTRAL, -1},
353 // Test that fullwidth escaped values are properly name-prepped, 367 // Test that fullwidth escaped values are properly name-prepped,
354 // then converted or rejected. 368 // then converted or rejected.
355 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input) 369 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
356 {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.c om", url_parse::Component(0, 5), true}, 370 {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.c om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1},
357 {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.c om", url_parse::Component(0, 5), true}, 371 {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.c om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1},
358 // ...%00 in fullwidth should fail (also as escaped UTF-8 input) 372 // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
359 {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00 .com", url_parse::Component(0, 7), false}, 373 {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00 .com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1},
360 {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00 .com", url_parse::Component(0, 7), false}, 374 {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00 .com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1},
361 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN 375 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
362 {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x5 97d", "xn--6qqa088eba", url_parse::Component(0, 14), true}, 376 {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x5 97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1} ,
363 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped 377 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
364 // UTF-8 (wide case). The output should be equivalent to the true wide 378 // UTF-8 (wide case). The output should be equivalent to the true wide
365 // character input above). 379 // character input above).
366 {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x5 97d", "xn--6qqa088eba", url_parse::Component(0, 14), true}, 380 {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x5 97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1} ,
367 // Invalid escaped characters should fail and the percents should be 381 // Invalid escaped characters should fail and the percents should be
368 // escaped. 382 // escaped.
369 {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), false}, 383 {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), CanonHo stInfo::BROKEN, -1},
370 // If we get an invalid character that has been escaped. 384 // If we get an invalid character that has been escaped.
371 {"%25", L"%25", "%25", url_parse::Component(0, 3), false}, 385 {"%25", L"%25", "%25", url_parse::Component(0, 3), CanonHostInfo::BROKEN, -1 },
372 {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), false}, 386 {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), CanonHostI nfo::BROKEN, -1},
373 // Escaped numbers should be treated like IP addresses if they are. 387 // Escaped numbers should be treated like IP addresses if they are.
374 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.1 68.0.1", url_parse::Component(0, 11), true}, 388 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.1 68.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},
375 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", url_parse::Component(0, 11), true}, 389 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},
376 // Invalid escaping should trigger the regular host error handling. 390 // Invalid escaping should trigger the regular host error handling.
377 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", url_parse::Component(0, 17), false}, 391 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", url_parse::Component(0, 17), CanonHostInfo::BROKEN, -1},
378 // Something that isn't exactly an IP should get treated as a host and 392 // Something that isn't exactly an IP should get treated as a host and
379 // spaces escaped. 393 // spaces escaped.
380 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse ::Component(0, 19), true}, 394 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse ::Component(0, 19), CanonHostInfo::NEUTRAL, -1},
381 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. 395 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
382 // These are "0Xc0.0250.01" in fullwidth. 396 // These are "0Xc0.0250.01" in fullwidth.
383 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x 92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_ parse::Component(0, 11), true}, 397 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x 92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_ parse::Component(0, 11), CanonHostInfo::IPV4, 3},
398 // Broken IP addresses get marked as such.
399 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", url_parse::Component(0, 13), CanonHostInfo::BROKEN, -1},
400 {"[google.com]", L"[google.com]", "[google.com]", url_parse::Component(0, 12 ), CanonHostInfo::BROKEN, -1},
384 }; 401 };
385 402
403 // CanonicalizeHost() non-verbose.
386 std::string out_str; 404 std::string out_str;
387 for (size_t i = 0; i < arraysize(host_cases); i++) { 405 for (size_t i = 0; i < arraysize(host_cases); i++) {
388 // Narrow version. 406 // Narrow version.
389 if (host_cases[i].input8) { 407 if (host_cases[i].input8) {
390 int host_len = static_cast<int>(strlen(host_cases[i].input8)); 408 int host_len = static_cast<int>(strlen(host_cases[i].input8));
391 url_parse::Component in_comp(0, host_len); 409 url_parse::Component in_comp(0, host_len);
392 url_parse::Component out_comp; 410 url_parse::Component out_comp;
393 411
394 out_str.clear(); 412 out_str.clear();
395 url_canon::StdStringCanonOutput output(&out_str); 413 url_canon::StdStringCanonOutput output(&out_str);
396 414
397 bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp, 415 bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp,
398 &output, &out_comp); 416 &output, &out_comp);
399 output.Complete(); 417 output.Complete();
400 418
401 EXPECT_EQ(host_cases[i].expected_success, success); 419 EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
420 success);
402 EXPECT_EQ(std::string(host_cases[i].expected), out_str); 421 EXPECT_EQ(std::string(host_cases[i].expected), out_str);
403 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); 422 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);
404 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); 423 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);
405 } 424 }
406 425
407 // Wide version. 426 // Wide version.
408 if (host_cases[i].input16) { 427 if (host_cases[i].input16) {
409 string16 input16(WStringToUTF16(host_cases[i].input16)); 428 string16 input16(WStringToUTF16(host_cases[i].input16));
410 int host_len = static_cast<int>(input16.length()); 429 int host_len = static_cast<int>(input16.length());
411 url_parse::Component in_comp(0, host_len); 430 url_parse::Component in_comp(0, host_len);
412 url_parse::Component out_comp; 431 url_parse::Component out_comp;
413 432
414 out_str.clear(); 433 out_str.clear();
415 url_canon::StdStringCanonOutput output(&out_str); 434 url_canon::StdStringCanonOutput output(&out_str);
416 435
417 bool success = url_canon::CanonicalizeHost(input16.c_str(), in_comp, 436 bool success = url_canon::CanonicalizeHost(input16.c_str(), in_comp,
418 &output, &out_comp); 437 &output, &out_comp);
419 output.Complete(); 438 output.Complete();
420 439
421 EXPECT_EQ(host_cases[i].expected_success, success); 440 EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
441 success);
422 EXPECT_EQ(std::string(host_cases[i].expected), out_str); 442 EXPECT_EQ(std::string(host_cases[i].expected), out_str);
423 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); 443 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);
424 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); 444 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);
425 } 445 }
426 } 446 }
447
448 // CanonicalizeHostVerbose()
449 for (size_t i = 0; i < arraysize(host_cases); i++) {
450 // Narrow version.
451 if (host_cases[i].input8) {
452 int host_len = static_cast<int>(strlen(host_cases[i].input8));
453 url_parse::Component in_comp(0, host_len);
454
455 out_str.clear();
456 url_canon::StdStringCanonOutput output(&out_str);
457 CanonHostInfo host_info;
458
459 url_canon::CanonicalizeHostVerbose(host_cases[i].input8, in_comp,
460 &output, &host_info);
461 output.Complete();
462
463 EXPECT_EQ(host_cases[i].expected_family, host_info.family);
464 EXPECT_EQ(std::string(host_cases[i].expected), out_str);
465 EXPECT_EQ(host_cases[i].expected_component.begin,
466 host_info.out_host.begin);
467 EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
468 if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
469 EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
470 host_info.num_ipv4_components);
471 }
472 }
473
474 // Wide version.
475 if (host_cases[i].input16) {
476 string16 input16(WStringToUTF16(host_cases[i].input16));
477 int host_len = static_cast<int>(input16.length());
478 url_parse::Component in_comp(0, host_len);
479
480 out_str.clear();
481 url_canon::StdStringCanonOutput output(&out_str);
482 CanonHostInfo host_info;
483
484 url_canon::CanonicalizeHostVerbose(input16.c_str(), in_comp,
485 &output, &host_info);
486 output.Complete();
487
488 EXPECT_EQ(host_cases[i].expected_family, host_info.family);
489 EXPECT_EQ(std::string(host_cases[i].expected), out_str);
490 EXPECT_EQ(host_cases[i].expected_component.begin,
491 host_info.out_host.begin);
492 EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
493 if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
494 EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
495 host_info.num_ipv4_components);
496 }
497 }
498 }
427 } 499 }
428 500
429 TEST(URLCanonTest, IPv4) { 501 TEST(URLCanonTest, IPv4) {
430 DualComponentCase cases[] = { 502 IPAddressCase cases[] = {
431 // Empty is not an IP address. 503 // Empty is not an IP address.
432 {"", L"", "", url_parse::Component(), false}, 504 {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
433 {".", L".", "", url_parse::Component(), false}, 505 {".", L".", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
434 // Regular IP addresses in different bases. 506 // Regular IP addresses in different bases.
435 {"192.168.0.1", L"192.168.0.1", "192.168.0.1", url_parse::Component(0, 11), true}, 507 {"192.168.0.1", L"192.168.0.1", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4},
436 {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", url_parse::Component( 0, 11), true}, 508 {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", url_parse::Component( 0, 11), CanonHostInfo::IPV4, 4},
437 {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", url_parse::Compon ent(0, 11), true}, 509 {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", url_parse::Compon ent(0, 11), CanonHostInfo::IPV4, 4},
438 // Non-IP addresses due to invalid characters. 510 // Non-IP addresses due to invalid characters.
439 {"192.168.9.com", L"192.168.9.com", "", url_parse::Component(), false}, 511 {"192.168.9.com", L"192.168.9.com", "", url_parse::Component(), CanonHostInf o::NEUTRAL, -1},
440 // Invalid characters for the base should be rejected. 512 // Invalid characters for the base should be rejected.
441 {"19a.168.0.1", L"19a.168.0.1", "", url_parse::Component(), false}, 513 {"19a.168.0.1", L"19a.168.0.1", "", url_parse::Component(), CanonHostInfo::N EUTRAL, -1},
442 {"0308.0250.00.01", L"0308.0250.00.01", "", url_parse::Component(), false}, 514 {"0308.0250.00.01", L"0308.0250.00.01", "", url_parse::Component(), CanonHos tInfo::NEUTRAL, -1},
443 {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", url_parse::Component(), fals e}, 515 {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", url_parse::Component(), Cano nHostInfo::NEUTRAL, -1},
444 // If there are not enough components, the last one should fill them out. 516 // If there are not enough components, the last one should fill them out.
445 {"192", L"192", "0.0.0.192", url_parse::Component(0, 9), true}, 517 {"192", L"192", "0.0.0.192", url_parse::Component(0, 9), CanonHostInfo::IPV4 , 1},
446 {"0xC0a80001", L"0xC0a80001", "192.168.0.1", url_parse::Component(0, 11), tr ue}, 518 {"0xC0a80001", L"0xC0a80001", "192.168.0.1", url_parse::Component(0, 11), Ca nonHostInfo::IPV4, 1},
447 {"030052000001", L"030052000001", "192.168.0.1", url_parse::Component(0, 11) , true}, 519 {"030052000001", L"030052000001", "192.168.0.1", url_parse::Component(0, 11) , CanonHostInfo::IPV4, 1},
448 {"000030052000001", L"000030052000001", "192.168.0.1", url_parse::Component( 0, 11), true}, 520 {"000030052000001", L"000030052000001", "192.168.0.1", url_parse::Component( 0, 11), CanonHostInfo::IPV4, 1},
449 {"192.168", L"192.168", "192.0.0.168", url_parse::Component(0, 11), true}, 521 {"192.168", L"192.168", "192.0.0.168", url_parse::Component(0, 11), CanonHos tInfo::IPV4, 2},
450 {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", url_parse::Component(0 , 11), true}, 522 {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", url_parse::Component(0 , 11), CanonHostInfo::IPV4, 2},
451 {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", url_parse::Component(0, 11), true}, 523 {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 2},
452 {"192.168.1", L"192.168.1", "192.168.0.1", url_parse::Component(0, 11), true }, 524 {"192.168.1", L"192.168.1", "192.168.0.1", url_parse::Component(0, 11), Cano nHostInfo::IPV4, 3},
453 // Too many components means not an IP address. 525 // Too many components means not an IP address.
454 {"192.168.0.0.1", L"192.168.0.0.1", "", url_parse::Component(), false}, 526 {"192.168.0.0.1", L"192.168.0.0.1", "", url_parse::Component(), CanonHostInf o::NEUTRAL, -1},
455 // We allow a single trailing dot. 527 // We allow a single trailing dot.
456 {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", url_parse::Component(0, 11) , true}, 528 {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", url_parse::Component(0, 11) , CanonHostInfo::IPV4, 4},
457 {"192.168.0.1. hello", L"192.168.0.1. hello", "", url_parse::Component(), fa lse}, 529 {"192.168.0.1. hello", L"192.168.0.1. hello", "", url_parse::Component(), Ca nonHostInfo::NEUTRAL, -1},
458 {"192.168.0.1..", L"192.168.0.1..", "", url_parse::Component(), false}, 530 {"192.168.0.1..", L"192.168.0.1..", "", url_parse::Component(), CanonHostInf o::NEUTRAL, -1},
459 // Two dots in a row means not an IP address. 531 // Two dots in a row means not an IP address.
460 {"192.168..1", L"192.168..1", "", url_parse::Component(), false}, 532 {"192.168..1", L"192.168..1", "", url_parse::Component(), CanonHostInfo::NEU TRAL, -1},
461 // Any non-first components get truncated to one byte. 533 // Any numerical overflow should be marked as BROKEN.
462 {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "20.0.162.255", url_pars e::Component(0, 12), true}, 534 {"0x100.0", L"0x100.0", "", url_parse::Component(), CanonHostInfo::BROKEN, - 1},
463 // The last component should get truncated to however much space is 535 {"0x100.0.0", L"0x100.0.0", "", url_parse::Component(), CanonHostInfo::BROKE N, -1},
464 // remaining. 536 {"0x100.0.0.0", L"0x100.0.0.0", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
465 {"192.168.0.257", L"192.168.0.257", "192.168.0.1", url_parse::Component(0, 1 1), true}, 537 {"0.0x100.0.0", L"0.0x100.0.0", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
466 {"192.168.0xa20001", L"192.168.0xa20001", "192.168.0.1", url_parse::Componen t(0, 11), true}, 538 {"0.0.0x100.0", L"0.0.0x100.0", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
467 {"192.015052000001", L"192.015052000001", "192.168.0.1", url_parse::Componen t(0, 11), true}, 539 {"0.0.0.0x100", L"0.0.0.0x100", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
468 {"0X12C0a80001", L"0X12C0a80001", "192.168.0.1", url_parse::Component(0, 11) , true}, 540 {"0.0.0x10000", L"0.0.0x10000", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
541 {"0.0x1000000", L"0.0x1000000", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
542 {"0x100000000", L"0x100000000", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
543 // Repeat the previous tests, minus 1, to verify boundaries.
544 {"0xFF.0", L"0xFF.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo ::IPV4, 2},
545 {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", url_parse::Component(0, 9), CanonHost Info::IPV4, 3},
546 {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", url_parse::Component(0, 9), Canon HostInfo::IPV4, 4},
547 {"0.0xFF.0.0", L"0.0xFF.0.0", "0.255.0.0", url_parse::Component(0, 9), Canon HostInfo::IPV4, 4},
548 {"0.0.0xFF.0", L"0.0.0xFF.0", "0.0.255.0", url_parse::Component(0, 9), Canon HostInfo::IPV4, 4},
549 {"0.0.0.0xFF", L"0.0.0.0xFF", "0.0.0.255", url_parse::Component(0, 9), Canon HostInfo::IPV4, 4},
550 {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", url_parse::Component(0, 11), Ca nonHostInfo::IPV4, 3},
551 {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", url_parse::Component(0, 13), CanonHostInfo::IPV4, 2},
552 {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", url_parse::Component(0, 15) , CanonHostInfo::IPV4, 1},
553 // Old trunctations tests. They're all "BROKEN" now.
554 {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", url_parse::Component (), CanonHostInfo::BROKEN, -1},
555 {"192.168.0.257", L"192.168.0.257", "", url_parse::Component(), CanonHostInf o::BROKEN, -1},
556 {"192.168.0xa20001", L"192.168.0xa20001", "", url_parse::Component(), CanonH ostInfo::BROKEN, -1},
557 {"192.015052000001", L"192.015052000001", "", url_parse::Component(), CanonH ostInfo::BROKEN, -1},
558 {"0X12C0a80001", L"0X12C0a80001", "", url_parse::Component(), CanonHostInfo: :BROKEN, -1},
559 {"276.1.2", L"276.1.2", "", url_parse::Component(), CanonHostInfo::BROKEN, - 1},
469 // Spaces should be rejected. 560 // Spaces should be rejected.
470 {"192.168.0.1 hello", L"192.168.0.1 hello", "", url_parse::Component(), fals e}, 561 {"192.168.0.1 hello", L"192.168.0.1 hello", "", url_parse::Component(), Cano nHostInfo::NEUTRAL, -1},
471 // Truncation plus the last component missing. 562 // Very large numbers.
472 {"276.1.2", L"276.1.2", "20.1.0.2", url_parse::Component(0, 8), true}, 563 {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300 .0x00000000000000fF.00000000000000001", "192.255.0.1", url_parse::Component(0, 1 1), CanonHostInfo::IPV4, 3},
473 // Very large numbers. We support up to 16 characters per component 564 {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300. 0xffffffffFFFFFFFF.3022415481470977", "", url_parse::Component(0, 11), CanonHost Info::BROKEN, -1},
474 // before rejecting. 565 // A number has no length limit, but long numbers can still overflow.
475 {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300. 0xffffffffFFFFFFFF.3022415481470977", "192.255.0.1", url_parse::Component(0, 11) , true}, 566 {"00000000000000000001", L"00000000000000000001", "0.0.0.1", url_parse::Comp onent(0, 7), CanonHostInfo::IPV4, 1},
476 {"000000000000000300.168.1", L"000000000000000300.168.1", "", url_parse::Com ponent(), false}, 567 {"0000000000000000100000000000000001", L"0000000000000000100000000000000001" , "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
568 // If a long component is non-numeric, it's a hostname, *not* a broken IP.
569 {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", url_parse::C omponent(), CanonHostInfo::NEUTRAL, -1},
570 {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", url_parse::C omponent(), CanonHostInfo::NEUTRAL, -1},
571 // Truncation of all zeros should still result in 0.
572 {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", url_parse::Component(0, 7), Canon HostInfo::IPV4, 4},
477 }; 573 };
478 574
479 for (size_t i = 0; i < arraysize(cases); i++) { 575 for (size_t i = 0; i < arraysize(cases); i++) {
480 // 8-bit version. 576 // 8-bit version.
481 url_parse::Component component(0, 577 url_parse::Component component(0,
482 static_cast<int>(strlen(cases[i].input8))); 578 static_cast<int>(strlen(cases[i].input8)));
483 579
484 std::string out_str1; 580 std::string out_str1;
485 url_canon::StdStringCanonOutput output1(&out_str1); 581 url_canon::StdStringCanonOutput output1(&out_str1);
486 url_parse::Component out_ip; 582 url_canon::CanonHostInfo host_info;
487 bool success = url_canon::CanonicalizeIPAddress(cases[i].input8, component, 583 url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1,
488 &output1, &out_ip); 584 &host_info);
489 output1.Complete(); 585 output1.Complete();
490 586
491 EXPECT_EQ(cases[i].expected_success, success); 587 EXPECT_EQ(cases[i].expected_family, host_info.family);
492 if (success) { 588 if (host_info.family == CanonHostInfo::IPV4) {
493 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); 589 EXPECT_STREQ(cases[i].expected, out_str1.c_str());
494 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin); 590 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
495 EXPECT_EQ(cases[i].expected_component.len, out_ip.len); 591 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
592 EXPECT_EQ(cases[i].expected_num_ipv4_components,
593 host_info.num_ipv4_components);
496 } 594 }
497 595
498 // 16-bit version. 596 // 16-bit version.
499 string16 input16(WStringToUTF16(cases[i].input16)); 597 string16 input16(WStringToUTF16(cases[i].input16));
500 component = url_parse::Component(0, static_cast<int>(input16.length())); 598 component = url_parse::Component(0, static_cast<int>(input16.length()));
501 599
502 std::string out_str2; 600 std::string out_str2;
503 url_canon::StdStringCanonOutput output2(&out_str2); 601 url_canon::StdStringCanonOutput output2(&out_str2);
504 success = url_canon::CanonicalizeIPAddress(input16.c_str(), component, 602 url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2,
505 &output2, &out_ip); 603 &host_info);
506 output2.Complete(); 604 output2.Complete();
507 605
508 EXPECT_EQ(cases[i].expected_success, success); 606 EXPECT_EQ(cases[i].expected_family, host_info.family);
509 if (success) { 607 if (host_info.family == CanonHostInfo::IPV4) {
510 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); 608 EXPECT_STREQ(cases[i].expected, out_str2.c_str());
511 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin); 609 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
512 EXPECT_EQ(cases[i].expected_component.len, out_ip.len); 610 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
611 EXPECT_EQ(cases[i].expected_num_ipv4_components,
612 host_info.num_ipv4_components);
513 } 613 }
514 } 614 }
515 } 615 }
516 616
517 TEST(URLCanonTest, IPv6) { 617 TEST(URLCanonTest, IPv6) {
518 DualComponentCase cases[] = { 618 IPAddressCase cases[] = {
519 // Empty is not an IP address. 619 // Empty is not an IP address.
520 {"", L"", "", url_parse::Component(), false}, 620 {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
521 {":", L":", "", url_parse::Component(), false}, 621 // Non-IPs with [:] characters are marked BROKEN.
522 {"[", L"[", "", url_parse::Component(), false}, 622 {":", L":", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
523 {"[:", L"[:", "", url_parse::Component(), false}, 623 {"[", L"[", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
524 {"]", L"]", "", url_parse::Component(), false}, 624 {"[:", L"[:", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
525 {":]", L":]", "", url_parse::Component(), false}, 625 {"]", L"]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
526 {"[]", L"[]", "", url_parse::Component(), false}, 626 {":]", L":]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
527 {"[:]", L"[:]", "", url_parse::Component(), false}, 627 {"[]", L"[]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
628 {"[:]", L"[:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
528 // Regular IP address is invalid without bounding '[' and ']'. 629 // Regular IP address is invalid without bounding '[' and ']'.
529 {"2001:db8::1", L"2001:db8::1", "", url_parse::Component(), false}, 630 {"2001:db8::1", L"2001:db8::1", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
530 {"[2001:db8::1", L"[2001:db8::1", "", url_parse::Component(), false}, 631 {"[2001:db8::1", L"[2001:db8::1", "", url_parse::Component(), CanonHostInfo: :BROKEN, -1},
531 {"2001:db8::1]", L"2001:db8::1]", "", url_parse::Component(), false}, 632 {"2001:db8::1]", L"2001:db8::1]", "", url_parse::Component(), CanonHostInfo: :BROKEN, -1},
532 // Regular IP addresses. 633 // Regular IP addresses.
533 {"[::]", L"[::]", "[::]", url_parse::Component(0,4), true}, 634 {"[::]", L"[::]", "[::]", url_parse::Component(0,4), CanonHostInfo::IPV6, -1 },
534 {"[::1]", L"[::1]", "[::1]", url_parse::Component(0,5), true}, 635 {"[::1]", L"[::1]", "[::1]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1},
535 {"[1::]", L"[1::]", "[1::]", url_parse::Component(0,5), true}, 636 {"[1::]", L"[1::]", "[1::]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1},
536 {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", url_parse::Component(0 ,10), true}, 637 {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", url_parse::Component(0 ,10), CanonHostInfo::IPV6, -1},
537 {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", url_par se::Component(0,15), true}, 638 {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", url_par se::Component(0,15), CanonHostInfo::IPV6, -1},
538 639
539 // Leading zeros should be stripped. 640 // Leading zeros should be stripped.
540 {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4: 5:6:7]", url_parse::Component(0,17), true}, 641 {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4: 5:6:7]", url_parse::Component(0,17), CanonHostInfo::IPV6, -1},
541 642
542 // Upper case letters should be lowercased. 643 // Upper case letters should be lowercased.
543 {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", ur l_parse::Component(0,20), true}, 644 {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", ur l_parse::Component(0,20), CanonHostInfo::IPV6, -1},
544 645
545 // The same address can be written with different contractions, but should 646 // The same address can be written with different contractions, but should
546 // get canonicalized to the same thing. 647 // get canonicalized to the same thing.
547 {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", url_parse::Component (0,14), true}, 648 {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", url_parse::Component (0,14), CanonHostInfo::IPV6, -1},
548 {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", url_parse::Component (0,14), true}, 649 {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", url_parse::Component (0,14), CanonHostInfo::IPV6, -1},
549 650
550 // IPv4 addresses 651 // IPv4 addresses
551 // Only mapped and compat addresses can have IPv4 syntax embedded. 652 // Only mapped and compat addresses can have IPv4 syntax embedded.
552 {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "", url_parse::Component() , false}, 653 {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "", url_parse::Component() , CanonHostInfo::BROKEN, -1},
553 {"[2001::192.168.0.1]", L"[2001::92.168.0.1]", "", url_parse::Component(), f alse}, 654 {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
554 {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", url_parse::Component (), false}, 655 {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", url_parse::Component (), CanonHostInfo::BROKEN, -1},
555 656
556 // IPv4 with truncation and last component missing. 657 // IPv4 with last component missing.
557 {"[::ffff:276.1.2]", L"[::ffff:276.1.2]", "[::ffff:1401:2]", url_parse::Comp onent(0,15), true}, 658 {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", url_parse::Comp onent(0,15), CanonHostInfo::IPV6, -1},
558 659
559 // IPv4 using hex. 660 // IPv4 using hex.
560 // TODO(eroman): Should this format be disallowed? 661 // TODO(eroman): Should this format be disallowed?
561 {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8: 1]", url_parse::Component(0,15), true}, 662 {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8: 1]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1},
562 663
563 // There may be zeros surrounding the "::" contraction. 664 // There may be zeros surrounding the "::" contraction.
564 {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", url_parse::Component(0,5), true}, 665 {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", url_parse::Component(0,5), CanonH ostInfo::IPV6, -1},
565 666
566 {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", url_parse::Component(0, 13), true}, 667 {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", url_parse::Component(0, 13), CanonHostInfo::IPV6, -1},
567 668
568 // Can only have one "::" contraction in an IPv6 string literal. 669 // Can only have one "::" contraction in an IPv6 string literal.
569 {"[2001::db8::1]", L"[2001::db8::1]", "", url_parse::Component(), false}, 670 {"[2001::db8::1]", L"[2001::db8::1]", "", url_parse::Component(), CanonHostI nfo::BROKEN, -1},
570 // No more than 2 consecutive ':'s. 671 // No more than 2 consecutive ':'s.
571 {"[2001:db8:::1]", L"[2001:db8:::1]", "", url_parse::Component(), false}, 672 {"[2001:db8:::1]", L"[2001:db8:::1]", "", url_parse::Component(), CanonHostI nfo::BROKEN, -1},
572 {"[:::]", L"[:::]", "", url_parse::Component(), false}, 673 {"[:::]", L"[:::]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
573 // Non-IP addresses due to invalid characters. 674 // Non-IP addresses due to invalid characters.
574 {"[2001::.com]", L"[2001::.com]", "", url_parse::Component(), false}, 675 {"[2001::.com]", L"[2001::.com]", "", url_parse::Component(), CanonHostInfo: :BROKEN, -1},
575 // If there are not enough components, the last one should fill them out. 676 // If there are not enough components, the last one should fill them out.
576 // ... omitted at this time ... 677 // ... omitted at this time ...
577 // Too many components means not an IP address. Similarly with too few if using IPv4 compat or mapped addresses. 678 // Too many components means not an IP address. Similarly with too few if using IPv4 compat or mapped addresses.
578 {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", url_parse::Component(), fals e}, 679 {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", url_parse::Component(), Cano nHostInfo::BROKEN, -1},
579 {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", url_parse::Compone nt(), false}, 680 {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", url_parse::Compone nt(), CanonHostInfo::BROKEN, -1},
580 {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", url_parse::Component(), false}, 681 {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
581 // Too many bits (even though 8 comonents, the last one holds 32 bits). 682 // Too many bits (even though 8 comonents, the last one holds 32 bits).
582 {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", url_pars e::Component(), false}, 683 {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", url_pars e::Component(), CanonHostInfo::BROKEN, -1},
583 684
584 // Too many bits specified -- the contraction would have to be zero-length 685 // Too many bits specified -- the contraction would have to be zero-length
585 // to not exceed 128 bits. 686 // to not exceed 128 bits.
586 {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", url_parse: :Component(), false}, 687 {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", url_parse: :Component(), CanonHostInfo::BROKEN, -1},
587 688
588 // The contraction is for 16 bits of zero. 689 // The contraction is for 16 bits of zero.
589 {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", url_parse::Co mponent(0,17), true}, 690 {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", url_parse::Co mponent(0,17), CanonHostInfo::IPV6, -1},
590 691
591 // Cannot have a trailing colon. 692 // Cannot have a trailing colon.
592 {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", url_parse::Component(), fa lse}, 693 {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", url_parse::Component(), Ca nonHostInfo::BROKEN, -1},
593 {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", url_parse: :Component(), false}, 694 {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", url_parse: :Component(), CanonHostInfo::BROKEN, -1},
594 695
595 // Cannot have negative numbers. 696 // Cannot have negative numbers.
596 {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", url_parse::Component(), fa lse}, 697 {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", url_parse::Component(), Ca nonHostInfo::BROKEN, -1},
597 698
598 // Scope ID -- the URL may contain an optional ["%" <scope_id>] section. 699 // Scope ID -- the URL may contain an optional ["%" <scope_id>] section.
599 // The scope_id should be included in the canonicalized URL, and is an 700 // The scope_id should be included in the canonicalized URL, and is an
600 // unsigned decimal number. 701 // unsigned decimal number.
601 702
602 // Invalid because no ID was given after the percent. 703 // Invalid because no ID was given after the percent.
603 704
604 // Don't allow scope-id 705 // Don't allow scope-id
605 {"[1::%1]", L"[1::%1]", "", url_parse::Component(), false}, 706 {"[1::%1]", L"[1::%1]", "", url_parse::Component(), CanonHostInfo::BROKEN, - 1},
606 {"[1::%eth0]", L"[1::%eth0]", "", url_parse::Component(), false}, 707 {"[1::%eth0]", L"[1::%eth0]", "", url_parse::Component(), CanonHostInfo::BRO KEN, -1},
607 {"[1::%]", L"[1::%]", "", url_parse::Component(), false}, 708 {"[1::%]", L"[1::%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1} ,
608 {"[%]", L"[%]", "", url_parse::Component(), false}, 709 {"[%]", L"[%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
609 {"[::%:]", L"[::%:]", "", url_parse::Component(), false}, 710 {"[::%:]", L"[::%:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1} ,
610 711
611 // Don't allow leading or trailing colons. 712 // Don't allow leading or trailing colons.
612 {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", url_parse::Component(), false}, 713 {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", url_parse::Component(), CanonHostInf o::BROKEN, -1},
613 {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", url_parse::Component(), false}, 714 {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", url_parse::Component(), CanonHostInf o::BROKEN, -1},
614 {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", url_parse::Component(), false}, 715 {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", url_parse::Component(), CanonHostI nfo::BROKEN, -1},
615 716
616 // We allow a single trailing dot. 717 // We allow a single trailing dot.
617 // ... omitted at this time ... 718 // ... omitted at this time ...
618 // Two dots in a row means not an IP address. 719 // Two dots in a row means not an IP address.
619 {"[::192.168..1]", L"[::192.168..1]", "", url_parse::Component(), false}, 720 {"[::192.168..1]", L"[::192.168..1]", "", url_parse::Component(), CanonHostI nfo::BROKEN, -1},
620 // Any non-first components get truncated to one byte. 721 // Any non-first components get truncated to one byte.
621 // ... omitted at this time ... 722 // ... omitted at this time ...
622 // Spaces should be rejected. 723 // Spaces should be rejected.
623 {"[::1 hello]", L"[::1 hello]", "", url_parse::Component(), false}, 724 {"[::1 hello]", L"[::1 hello]", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},
624 }; 725 };
625 726
626 for (size_t i = 0; i < arraysize(cases); i++) { 727 for (size_t i = 0; i < arraysize(cases); i++) {
627 // Print some context of what test we were on, to help debug failures.
628 SCOPED_TRACE(cases[i].input8);
629
630 // 8-bit version. 728 // 8-bit version.
631 url_parse::Component component(0, 729 url_parse::Component component(0,
632 static_cast<int>(strlen(cases[i].input8))); 730 static_cast<int>(strlen(cases[i].input8)));
633 731
634 std::string out_str1; 732 std::string out_str1;
635 url_canon::StdStringCanonOutput output1(&out_str1); 733 url_canon::StdStringCanonOutput output1(&out_str1);
636 url_parse::Component out_ip; 734 url_canon::CanonHostInfo host_info;
637 bool success = url_canon::CanonicalizeIPAddress(cases[i].input8, component, 735 url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1,
638 &output1, &out_ip); 736 &host_info);
639 output1.Complete(); 737 output1.Complete();
640 738
641 EXPECT_EQ(cases[i].expected_success, success); 739 EXPECT_EQ(cases[i].expected_family, host_info.family);
642 if (success) { 740 if (host_info.family == CanonHostInfo::IPV6) {
643 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); 741 EXPECT_STREQ(cases[i].expected, out_str1.c_str());
644 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin); 742 EXPECT_EQ(cases[i].expected_component.begin,
645 EXPECT_EQ(cases[i].expected_component.len, out_ip.len); 743 host_info.out_host.begin);
744 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
646 } 745 }
647 746
648 // 16-bit version. 747 // 16-bit version.
649 string16 input16(WStringToUTF16(cases[i].input16)); 748 string16 input16(WStringToUTF16(cases[i].input16));
650 component = url_parse::Component(0, static_cast<int>(input16.length())); 749 component = url_parse::Component(0, static_cast<int>(input16.length()));
651 750
652 std::string out_str2; 751 std::string out_str2;
653 url_canon::StdStringCanonOutput output2(&out_str2); 752 url_canon::StdStringCanonOutput output2(&out_str2);
654 success = url_canon::CanonicalizeIPAddress(input16.c_str(), component, 753 url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2,
655 &output2, &out_ip); 754 &host_info);
656 output2.Complete(); 755 output2.Complete();
657 756
658 EXPECT_EQ(cases[i].expected_success, success); 757 EXPECT_EQ(cases[i].expected_family, host_info.family);
659 if (success) { 758 if (host_info.family == CanonHostInfo::IPV6) {
660 EXPECT_STREQ(cases[i].expected, out_str1.c_str()); 759 EXPECT_STREQ(cases[i].expected, out_str2.c_str());
661 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin); 760 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
662 EXPECT_EQ(cases[i].expected_component.len, out_ip.len); 761 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
663 } 762 }
664 } 763 }
665 } 764 }
666 765
667 TEST(URLCanonTest, UserInfo) { 766 TEST(URLCanonTest, UserInfo) {
668 // Note that the canonicalizer should escape and treat empty components as 767 // Note that the canonicalizer should escape and treat empty components as
669 // not being there. 768 // not being there.
670 769
671 // We actually parse a full input URL so we can get the initial components. 770 // We actually parse a full input URL so we can get the initial components.
672 struct UserComponentCase { 771 struct UserComponentCase {
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after
1071 TEST(URLCanonTest, CanonicalizeStandardURL) { 1170 TEST(URLCanonTest, CanonicalizeStandardURL) {
1072 // The individual component canonicalize tests should have caught the cases 1171 // The individual component canonicalize tests should have caught the cases
1073 // for each of those components. Here, we just need to test that the various 1172 // for each of those components. Here, we just need to test that the various
1074 // parts are included or excluded properly, and have the correct separators. 1173 // parts are included or excluded properly, and have the correct separators.
1075 struct URLCase { 1174 struct URLCase {
1076 const char* input; 1175 const char* input;
1077 const char* expected; 1176 const char* expected;
1078 bool expected_success; 1177 bool expected_success;
1079 } cases[] = { 1178 } cases[] = {
1080 {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#", true}, 1179 {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#", true},
1180 {"http://[www.google.com]/", "http://[www.google.com]/", false},
1081 {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false }, 1181 {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false },
1082 {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", t rue}, 1182 {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", t rue},
1083 {"www.google.com", ":www.google.com/", true}, 1183 {"www.google.com", ":www.google.com/", true},
1084 {"http://192.0x00A80001", "http://192.168.0.1/", true}, 1184 {"http://192.0x00A80001", "http://192.168.0.1/", true},
1085 {"http://www/foo%2Ehtml", "http://www/foo.html", true}, 1185 {"http://www/foo%2Ehtml", "http://www/foo.html", true},
1086 1186
1087 // Backslashes should get converted to forward slashes. 1187 // Backslashes should get converted to forward slashes.
1088 {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true}, 1188 {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true},
1089 1189
1090 // Busted refs shouldn't make the whole thing fail. 1190 // Busted refs shouldn't make the whole thing fail.
(...skipping 717 matching lines...) Expand 10 before | Expand all | Expand 10 after
1808 url_canon::StdStringCanonOutput repl_output(&repl_str); 1908 url_canon::StdStringCanonOutput repl_output(&repl_str);
1809 url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed) ; 1909 url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed) ;
1810 repl_output.Complete(); 1910 repl_output.Complete();
1811 1911
1812 // Generate the expected string and check. 1912 // Generate the expected string and check.
1813 std::string expected("file:///foo?"); 1913 std::string expected("file:///foo?");
1814 for (size_t i = 0; i < new_query.length(); i++) 1914 for (size_t i = 0; i < new_query.length(); i++)
1815 expected.push_back('a'); 1915 expected.push_back('a');
1816 EXPECT_TRUE(expected == repl_str); 1916 EXPECT_TRUE(expected == repl_str);
1817 } 1917 }
OLDNEW
« no previous file with comments | « src/url_canon_ip.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698