src/url_canon_unittest.cc - Issue 114050: url_canon: New CanonicalizeHostVerbose() function.

Side by Side Diff: src/url_canon_unittest.cc

Issue 114050: url_canon: New CanonicalizeHostVerbose() function. (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/

Patch Set: Address brettw's comments Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2007, Google Inc.	1 // Copyright 2007, Google Inc.

2 // All rights reserved.	2 // All rights reserved.

3 //	3 //

4 // Redistribution and use in source and binary forms, with or without	4 // Redistribution and use in source and binary forms, with or without

5 // modification, are permitted provided that the following conditions are	5 // modification, are permitted provided that the following conditions are

6 // met:	6 // met:

7 //	7 //

8 // * Redistributions of source code must retain the above copyright	8 // * Redistributions of source code must retain the above copyright

9 // notice, this list of conditions and the following disclaimer.	9 // notice, this list of conditions and the following disclaimer.

10 // * Redistributions in binary form must reproduce the above	10 // * Redistributions in binary form must reproduce the above

(...skipping 30 matching lines...) Expand all Loading...
41 // Some implementations of base/basictypes.h may define ARRAYSIZE.	41 // Some implementations of base/basictypes.h may define ARRAYSIZE.

42 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro	42 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro

43 // which is in our version of basictypes.h.	43 // which is in our version of basictypes.h.

44 #ifndef ARRAYSIZE	44 #ifndef ARRAYSIZE

45 #define ARRAYSIZE ARRAYSIZE_UNSAFE	45 #define ARRAYSIZE ARRAYSIZE_UNSAFE

46 #endif	46 #endif

47	47

48 using url_test_utils::WStringToUTF16;	48 using url_test_utils::WStringToUTF16;

49 using url_test_utils::ConvertUTF8ToUTF16;	49 using url_test_utils::ConvertUTF8ToUTF16;

50 using url_test_utils::ConvertUTF16ToUTF8;	50 using url_test_utils::ConvertUTF16ToUTF8;

	51 using url_canon::CanonHostInfo;

51	52

52 namespace {	53 namespace {

53	54

54 struct ComponentCase {	55 struct ComponentCase {

55 const char* input;	56 const char* input;

56 const char* expected;	57 const char* expected;

57 url_parse::Component expected_component;	58 url_parse::Component expected_component;

58 bool expected_success;	59 bool expected_success;

59 };	60 };

60	61

61 // ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests	62 // ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests

62 // treat each input as optional, and will only try processing if non-NULL.	63 // treat each input as optional, and will only try processing if non-NULL.

63 // The output is always 8-bit.	64 // The output is always 8-bit.

64 struct DualComponentCase {	65 struct DualComponentCase {

65 const char* input8;	66 const char* input8;

66 const wchar_t* input16;	67 const wchar_t* input16;

67 const char* expected;	68 const char* expected;

68 url_parse::Component expected_component;	69 url_parse::Component expected_component;

69 bool expected_success;	70 bool expected_success;

70 };	71 };

71	72

	73 // Test cases for CanonicalizeIPAddress(). The inputs are identical to

	74 // DualComponentCase, but the output has extra CanonHostInfo fields.

	75 struct IPAddressCase {

	76 const char* input8;

	77 const wchar_t* input16;

	78 const char* expected;

	79 url_parse::Component expected_component;

	80

	81 // CanonHostInfo fields, for verbose output.

	82 CanonHostInfo::Family expected_family;

	83 int expected_num_ipv4_components;

	84 };

	85

72 struct ReplaceCase {	86 struct ReplaceCase {

73 const char* base;	87 const char* base;

74 const char* scheme;	88 const char* scheme;

75 const char* username;	89 const char* username;

76 const char* password;	90 const char* password;

77 const char* host;	91 const char* host;

78 const char* port;	92 const char* port;

79 const char* path;	93 const char* path;

80 const char* query;	94 const char* query;

81 const char* ref;	95 const char* ref;

(...skipping 238 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
320 EXPECT_TRUE(url_canon::CanonicalizeScheme("", url_parse::Component(0, -1),	334 EXPECT_TRUE(url_canon::CanonicalizeScheme("", url_parse::Component(0, -1),

321 &output, &out_comp));	335 &output, &out_comp));

322 output.Complete();	336 output.Complete();

323	337

324 EXPECT_EQ(std::string(":"), out_str);	338 EXPECT_EQ(std::string(":"), out_str);

325 EXPECT_EQ(0, out_comp.begin);	339 EXPECT_EQ(0, out_comp.begin);

326 EXPECT_EQ(0, out_comp.len);	340 EXPECT_EQ(0, out_comp.len);

327 }	341 }

328	342

329 TEST(URLCanonTest, Host) {	343 TEST(URLCanonTest, Host) {

330 DualComponentCase host_cases[] = {	344 IPAddressCase host_cases[] = {

331 // Basic canonicalization, uppercase should be converted to lowercase.	345 // Basic canonicalization, uppercase should be converted to lowercase.

332 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", url_parse::Component(0, 10), tru e},	346 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", url_parse::Component(0, 10), Can onHostInfo::NEUTRAL, -1},

333 // Spaces and some other characters should be escaped.	347 // Spaces and some other characters should be escaped.

334 {"Goo%20 goo%7C\|.com", L"Goo%20 goo%7C\|.com", "goo%20%20goo%7C%7C.com", url_ parse::Component(0, 22), true},	348 {"Goo%20 goo%7C\|.com", L"Goo%20 goo%7C\|.com", "goo%20%20goo%7C%7C.com", url_ parse::Component(0, 22), CanonHostInfo::NEUTRAL, -1},

335 // Exciting different types of spaces!	349 // Exciting different types of spaces!

336 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", url_parse::Component(0 , 16), true},	350 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", url_parse::Component(0 , 16), CanonHostInfo::NEUTRAL, -1},

337 // Other types of space (no-break, zero-width, zero-width-no-break) are	351 // Other types of space (no-break, zero-width, zero-width-no-break) are

338 // name-prepped away to nothing.	352 // name-prepped away to nothing.

339 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", url_parse::Component(0 , 10), true},	353 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", url_parse::Component(0 , 10), CanonHostInfo::NEUTRAL, -1},

340 // Ideographic full stop (full-width period for Chinese, etc.) should be	354 // Ideographic full stop (full-width period for Chinese, etc.) should be

341 // treated as a dot.	355 // treated as a dot.

342 {NULL, L"www.foo\x3002"L"bar.com", "www.foo.bar.com", url_parse::Component(0 , 15), true},	356 {NULL, L"www.foo\x3002"L"bar.com", "www.foo.bar.com", url_parse::Component(0 , 15), CanonHostInfo::NEUTRAL, -1},

343 // Invalid unicode characters should fail...	357 // Invalid unicode characters should fail...

344 // ...In wide input, ICU will barf and we'll end up with the input as	358 // ...In wide input, ICU will barf and we'll end up with the input as

345 // escaped UTF-8 (the invalid character should be replaced with the	359 // escaped UTF-8 (the invalid character should be replaced with the

346 // replacement character).	360 // replacement character).

347 {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", url_parse::Com ponent(0, 16), false},	361 {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", url_parse::Com ponent(0, 16), CanonHostInfo::BROKEN, -1},

348 // ...This is the same as previous but with with escaped.	362 // ...This is the same as previous but with with escaped.

349 {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", url_parse::Com ponent(0, 16), false},	363 {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", url_parse::Com ponent(0, 16), CanonHostInfo::BROKEN, -1},

350 // Test name prepping, fullwidth input should be converted to ASCII and NO T	364 // Test name prepping, fullwidth input should be converted to ASCII and NO T

351 // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.	365 // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.

352 {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", url_parse::C omponent(0, 6), true},	366 {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", url_parse::C omponent(0, 6), CanonHostInfo::NEUTRAL, -1},

353 // Test that fullwidth escaped values are properly name-prepped,	367 // Test that fullwidth escaped values are properly name-prepped,

354 // then converted or rejected.	368 // then converted or rejected.

355 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)	369 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)

356 {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.c om", url_parse::Component(0, 5), true},	370 {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.c om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1},

357 {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.c om", url_parse::Component(0, 5), true},	371 {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.c om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1},

358 // ...%00 in fullwidth should fail (also as escaped UTF-8 input)	372 // ...%00 in fullwidth should fail (also as escaped UTF-8 input)

359 {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00 .com", url_parse::Component(0, 7), false},	373 {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00 .com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1},

360 {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00 .com", url_parse::Component(0, 7), false},	374 {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00 .com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1},

361 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN	375 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN

362 {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x5 97d", "xn--6qqa088eba", url_parse::Component(0, 14), true},	376 {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x5 97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1} ,

363 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped	377 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped

364 // UTF-8 (wide case). The output should be equivalent to the true wide	378 // UTF-8 (wide case). The output should be equivalent to the true wide

365 // character input above).	379 // character input above).

366 {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x5 97d", "xn--6qqa088eba", url_parse::Component(0, 14), true},	380 {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x5 97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1} ,

367 // Invalid escaped characters should fail and the percents should be	381 // Invalid escaped characters should fail and the percents should be

368 // escaped.	382 // escaped.

369 {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), false},	383 {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), CanonHo stInfo::BROKEN, -1},

370 // If we get an invalid character that has been escaped.	384 // If we get an invalid character that has been escaped.

371 {"%25", L"%25", "%25", url_parse::Component(0, 3), false},	385 {"%25", L"%25", "%25", url_parse::Component(0, 3), CanonHostInfo::BROKEN, -1 },

372 {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), false},	386 {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), CanonHostI nfo::BROKEN, -1},

373 // Escaped numbers should be treated like IP addresses if they are.	387 // Escaped numbers should be treated like IP addresses if they are.

374 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.1 68.0.1", url_parse::Component(0, 11), true},	388 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.1 68.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},

375 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", url_parse::Component(0, 11), true},	389 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},

376 // Invalid escaping should trigger the regular host error handling.	390 // Invalid escaping should trigger the regular host error handling.

377 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", url_parse::Component(0, 17), false},	391 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", url_parse::Component(0, 17), CanonHostInfo::BROKEN, -1},

378 // Something that isn't exactly an IP should get treated as a host and	392 // Something that isn't exactly an IP should get treated as a host and

379 // spaces escaped.	393 // spaces escaped.

380 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse ::Component(0, 19), true},	394 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse ::Component(0, 19), CanonHostInfo::NEUTRAL, -1},

381 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.	395 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.

382 // These are "0Xc0.0250.01" in fullwidth.	396 // These are "0Xc0.0250.01" in fullwidth.

383 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x 92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_ parse::Component(0, 11), true},	397 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x 92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_ parse::Component(0, 11), CanonHostInfo::IPV4, 3},

	398 // Broken IP addresses get marked as such.

	399 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", url_parse::Component(0, 13), CanonHostInfo::BROKEN, -1},

	400 {"[google.com]", L"[google.com]", "[google.com]", url_parse::Component(0, 12 ), CanonHostInfo::BROKEN, -1},

384 };	401 };

385	402

	403 // CanonicalizeHost() non-verbose.

386 std::string out_str;	404 std::string out_str;

387 for (size_t i = 0; i < arraysize(host_cases); i++) {	405 for (size_t i = 0; i < arraysize(host_cases); i++) {

388 // Narrow version.	406 // Narrow version.

389 if (host_cases[i].input8) {	407 if (host_cases[i].input8) {

390 int host_len = static_cast<int>(strlen(host_cases[i].input8));	408 int host_len = static_cast<int>(strlen(host_cases[i].input8));

391 url_parse::Component in_comp(0, host_len);	409 url_parse::Component in_comp(0, host_len);

392 url_parse::Component out_comp;	410 url_parse::Component out_comp;

393	411

394 out_str.clear();	412 out_str.clear();

395 url_canon::StdStringCanonOutput output(&out_str);	413 url_canon::StdStringCanonOutput output(&out_str);

396	414

397 bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp,	415 bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp,

398 &output, &out_comp);	416 &output, &out_comp);

399 output.Complete();	417 output.Complete();

400	418

401 EXPECT_EQ(host_cases[i].expected_success, success);	419 EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,

	420 success);

402 EXPECT_EQ(std::string(host_cases[i].expected), out_str);	421 EXPECT_EQ(std::string(host_cases[i].expected), out_str);

403 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);	422 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);

404 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);	423 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);

405 }	424 }

406	425

407 // Wide version.	426 // Wide version.

408 if (host_cases[i].input16) {	427 if (host_cases[i].input16) {

409 string16 input16(WStringToUTF16(host_cases[i].input16));	428 string16 input16(WStringToUTF16(host_cases[i].input16));

410 int host_len = static_cast<int>(input16.length());	429 int host_len = static_cast<int>(input16.length());

411 url_parse::Component in_comp(0, host_len);	430 url_parse::Component in_comp(0, host_len);

412 url_parse::Component out_comp;	431 url_parse::Component out_comp;

413	432

414 out_str.clear();	433 out_str.clear();

415 url_canon::StdStringCanonOutput output(&out_str);	434 url_canon::StdStringCanonOutput output(&out_str);

416	435

417 bool success = url_canon::CanonicalizeHost(input16.c_str(), in_comp,	436 bool success = url_canon::CanonicalizeHost(input16.c_str(), in_comp,

418 &output, &out_comp);	437 &output, &out_comp);

419 output.Complete();	438 output.Complete();

420	439

421 EXPECT_EQ(host_cases[i].expected_success, success);	440 EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,

	441 success);

422 EXPECT_EQ(std::string(host_cases[i].expected), out_str);	442 EXPECT_EQ(std::string(host_cases[i].expected), out_str);

423 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);	443 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);

424 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);	444 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);

425 }	445 }

426 }	446 }

	447

	448 // CanonicalizeHostVerbose()

	449 for (size_t i = 0; i < arraysize(host_cases); i++) {

	450 // Narrow version.

	451 if (host_cases[i].input8) {

	452 int host_len = static_cast<int>(strlen(host_cases[i].input8));

	453 url_parse::Component in_comp(0, host_len);

	454

	455 out_str.clear();

	456 url_canon::StdStringCanonOutput output(&out_str);

	457 CanonHostInfo host_info;

	458

	459 url_canon::CanonicalizeHostVerbose(host_cases[i].input8, in_comp,

	460 &output, &host_info);

	461 output.Complete();

	462

	463 EXPECT_EQ(host_cases[i].expected_family, host_info.family);

	464 EXPECT_EQ(std::string(host_cases[i].expected), out_str);

	465 EXPECT_EQ(host_cases[i].expected_component.begin,

	466 host_info.out_host.begin);

	467 EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);

	468 if (host_cases[i].expected_family == CanonHostInfo::IPV4) {

	469 EXPECT_EQ(host_cases[i].expected_num_ipv4_components,

	470 host_info.num_ipv4_components);

	471 }

	472 }

	473

	474 // Wide version.

	475 if (host_cases[i].input16) {

	476 string16 input16(WStringToUTF16(host_cases[i].input16));

	477 int host_len = static_cast<int>(input16.length());

	478 url_parse::Component in_comp(0, host_len);

	479

	480 out_str.clear();

	481 url_canon::StdStringCanonOutput output(&out_str);

	482 CanonHostInfo host_info;

	483

	484 url_canon::CanonicalizeHostVerbose(input16.c_str(), in_comp,

	485 &output, &host_info);

	486 output.Complete();

	487

	488 EXPECT_EQ(host_cases[i].expected_family, host_info.family);

	489 EXPECT_EQ(std::string(host_cases[i].expected), out_str);

	490 EXPECT_EQ(host_cases[i].expected_component.begin,

	491 host_info.out_host.begin);

	492 EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);

	493 if (host_cases[i].expected_family == CanonHostInfo::IPV4) {

	494 EXPECT_EQ(host_cases[i].expected_num_ipv4_components,

	495 host_info.num_ipv4_components);

	496 }

	497 }

	498 }

427 }	499 }

428	500

429 TEST(URLCanonTest, IPv4) {	501 TEST(URLCanonTest, IPv4) {

430 DualComponentCase cases[] = {	502 IPAddressCase cases[] = {

431 // Empty is not an IP address.	503 // Empty is not an IP address.

432 {"", L"", "", url_parse::Component(), false},	504 {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},

433 {".", L".", "", url_parse::Component(), false},	505 {".", L".", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},

434 // Regular IP addresses in different bases.	506 // Regular IP addresses in different bases.

435 {"192.168.0.1", L"192.168.0.1", "192.168.0.1", url_parse::Component(0, 11), true},	507 {"192.168.0.1", L"192.168.0.1", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4},

436 {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", url_parse::Component( 0, 11), true},	508 {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", url_parse::Component( 0, 11), CanonHostInfo::IPV4, 4},

437 {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", url_parse::Compon ent(0, 11), true},	509 {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", url_parse::Compon ent(0, 11), CanonHostInfo::IPV4, 4},

438 // Non-IP addresses due to invalid characters.	510 // Non-IP addresses due to invalid characters.

439 {"192.168.9.com", L"192.168.9.com", "", url_parse::Component(), false},	511 {"192.168.9.com", L"192.168.9.com", "", url_parse::Component(), CanonHostInf o::NEUTRAL, -1},

440 // Invalid characters for the base should be rejected.	512 // Invalid characters for the base should be rejected.

441 {"19a.168.0.1", L"19a.168.0.1", "", url_parse::Component(), false},	513 {"19a.168.0.1", L"19a.168.0.1", "", url_parse::Component(), CanonHostInfo::N EUTRAL, -1},

442 {"0308.0250.00.01", L"0308.0250.00.01", "", url_parse::Component(), false},	514 {"0308.0250.00.01", L"0308.0250.00.01", "", url_parse::Component(), CanonHos tInfo::NEUTRAL, -1},

443 {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", url_parse::Component(), fals e},	515 {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", url_parse::Component(), Cano nHostInfo::NEUTRAL, -1},

444 // If there are not enough components, the last one should fill them out.	516 // If there are not enough components, the last one should fill them out.

445 {"192", L"192", "0.0.0.192", url_parse::Component(0, 9), true},	517 {"192", L"192", "0.0.0.192", url_parse::Component(0, 9), CanonHostInfo::IPV4 , 1},

446 {"0xC0a80001", L"0xC0a80001", "192.168.0.1", url_parse::Component(0, 11), tr ue},	518 {"0xC0a80001", L"0xC0a80001", "192.168.0.1", url_parse::Component(0, 11), Ca nonHostInfo::IPV4, 1},

447 {"030052000001", L"030052000001", "192.168.0.1", url_parse::Component(0, 11) , true},	519 {"030052000001", L"030052000001", "192.168.0.1", url_parse::Component(0, 11) , CanonHostInfo::IPV4, 1},

448 {"000030052000001", L"000030052000001", "192.168.0.1", url_parse::Component( 0, 11), true},	520 {"000030052000001", L"000030052000001", "192.168.0.1", url_parse::Component( 0, 11), CanonHostInfo::IPV4, 1},

449 {"192.168", L"192.168", "192.0.0.168", url_parse::Component(0, 11), true},	521 {"192.168", L"192.168", "192.0.0.168", url_parse::Component(0, 11), CanonHos tInfo::IPV4, 2},

450 {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", url_parse::Component(0 , 11), true},	522 {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", url_parse::Component(0 , 11), CanonHostInfo::IPV4, 2},

451 {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", url_parse::Component(0, 11), true},	523 {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 2},

452 {"192.168.1", L"192.168.1", "192.168.0.1", url_parse::Component(0, 11), true },	524 {"192.168.1", L"192.168.1", "192.168.0.1", url_parse::Component(0, 11), Cano nHostInfo::IPV4, 3},

453 // Too many components means not an IP address.	525 // Too many components means not an IP address.

454 {"192.168.0.0.1", L"192.168.0.0.1", "", url_parse::Component(), false},	526 {"192.168.0.0.1", L"192.168.0.0.1", "", url_parse::Component(), CanonHostInf o::NEUTRAL, -1},

455 // We allow a single trailing dot.	527 // We allow a single trailing dot.

456 {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", url_parse::Component(0, 11) , true},	528 {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", url_parse::Component(0, 11) , CanonHostInfo::IPV4, 4},

457 {"192.168.0.1. hello", L"192.168.0.1. hello", "", url_parse::Component(), fa lse},	529 {"192.168.0.1. hello", L"192.168.0.1. hello", "", url_parse::Component(), Ca nonHostInfo::NEUTRAL, -1},

458 {"192.168.0.1..", L"192.168.0.1..", "", url_parse::Component(), false},	530 {"192.168.0.1..", L"192.168.0.1..", "", url_parse::Component(), CanonHostInf o::NEUTRAL, -1},

459 // Two dots in a row means not an IP address.	531 // Two dots in a row means not an IP address.

460 {"192.168..1", L"192.168..1", "", url_parse::Component(), false},	532 {"192.168..1", L"192.168..1", "", url_parse::Component(), CanonHostInfo::NEU TRAL, -1},

461 // Any non-first components get truncated to one byte.	533 // Any numerical overflow should be marked as BROKEN.

462 {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "20.0.162.255", url_pars e::Component(0, 12), true},	534 {"0x100.0", L"0x100.0", "", url_parse::Component(), CanonHostInfo::BROKEN, - 1},

463 // The last component should get truncated to however much space is	535 {"0x100.0.0", L"0x100.0.0", "", url_parse::Component(), CanonHostInfo::BROKE N, -1},

464 // remaining.	536 {"0x100.0.0.0", L"0x100.0.0.0", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

465 {"192.168.0.257", L"192.168.0.257", "192.168.0.1", url_parse::Component(0, 1 1), true},	537 {"0.0x100.0.0", L"0.0x100.0.0", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

466 {"192.168.0xa20001", L"192.168.0xa20001", "192.168.0.1", url_parse::Componen t(0, 11), true},	538 {"0.0.0x100.0", L"0.0.0x100.0", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

467 {"192.015052000001", L"192.015052000001", "192.168.0.1", url_parse::Componen t(0, 11), true},	539 {"0.0.0.0x100", L"0.0.0.0x100", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

468 {"0X12C0a80001", L"0X12C0a80001", "192.168.0.1", url_parse::Component(0, 11) , true},	540 {"0.0.0x10000", L"0.0.0x10000", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

	541 {"0.0x1000000", L"0.0x1000000", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

	542 {"0x100000000", L"0x100000000", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

	543 // Repeat the previous tests, minus 1, to verify boundaries.

	544 {"0xFF.0", L"0xFF.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo ::IPV4, 2},

	545 {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", url_parse::Component(0, 9), CanonHost Info::IPV4, 3},

	546 {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", url_parse::Component(0, 9), Canon HostInfo::IPV4, 4},

	547 {"0.0xFF.0.0", L"0.0xFF.0.0", "0.255.0.0", url_parse::Component(0, 9), Canon HostInfo::IPV4, 4},

	548 {"0.0.0xFF.0", L"0.0.0xFF.0", "0.0.255.0", url_parse::Component(0, 9), Canon HostInfo::IPV4, 4},

	549 {"0.0.0.0xFF", L"0.0.0.0xFF", "0.0.0.255", url_parse::Component(0, 9), Canon HostInfo::IPV4, 4},

	550 {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", url_parse::Component(0, 11), Ca nonHostInfo::IPV4, 3},

	551 {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", url_parse::Component(0, 13), CanonHostInfo::IPV4, 2},

	552 {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", url_parse::Component(0, 15) , CanonHostInfo::IPV4, 1},

	553 // Old trunctations tests. They're all "BROKEN" now.

	554 {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", url_parse::Component (), CanonHostInfo::BROKEN, -1},

	555 {"192.168.0.257", L"192.168.0.257", "", url_parse::Component(), CanonHostInf o::BROKEN, -1},

	556 {"192.168.0xa20001", L"192.168.0xa20001", "", url_parse::Component(), CanonH ostInfo::BROKEN, -1},

	557 {"192.015052000001", L"192.015052000001", "", url_parse::Component(), CanonH ostInfo::BROKEN, -1},

	558 {"0X12C0a80001", L"0X12C0a80001", "", url_parse::Component(), CanonHostInfo: :BROKEN, -1},

	559 {"276.1.2", L"276.1.2", "", url_parse::Component(), CanonHostInfo::BROKEN, - 1},

469 // Spaces should be rejected.	560 // Spaces should be rejected.

470 {"192.168.0.1 hello", L"192.168.0.1 hello", "", url_parse::Component(), fals e},	561 {"192.168.0.1 hello", L"192.168.0.1 hello", "", url_parse::Component(), Cano nHostInfo::NEUTRAL, -1},

471 // Truncation plus the last component missing.	562 // Very large numbers.

472 {"276.1.2", L"276.1.2", "20.1.0.2", url_parse::Component(0, 8), true},	563 {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300 .0x00000000000000fF.00000000000000001", "192.255.0.1", url_parse::Component(0, 1 1), CanonHostInfo::IPV4, 3},

473 // Very large numbers. We support up to 16 characters per component	564 {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300. 0xffffffffFFFFFFFF.3022415481470977", "", url_parse::Component(0, 11), CanonHost Info::BROKEN, -1},

474 // before rejecting.	565 // A number has no length limit, but long numbers can still overflow.

475 {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300. 0xffffffffFFFFFFFF.3022415481470977", "192.255.0.1", url_parse::Component(0, 11) , true},	566 {"00000000000000000001", L"00000000000000000001", "0.0.0.1", url_parse::Comp onent(0, 7), CanonHostInfo::IPV4, 1},

476 {"000000000000000300.168.1", L"000000000000000300.168.1", "", url_parse::Com ponent(), false},	567 {"0000000000000000100000000000000001", L"0000000000000000100000000000000001" , "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

	568 // If a long component is non-numeric, it's a hostname, not a broken IP.

	569 {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", url_parse::C omponent(), CanonHostInfo::NEUTRAL, -1},

	570 {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", url_parse::C omponent(), CanonHostInfo::NEUTRAL, -1},

	571 // Truncation of all zeros should still result in 0.

	572 {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", url_parse::Component(0, 7), Canon HostInfo::IPV4, 4},

477 };	573 };

478	574

479 for (size_t i = 0; i < arraysize(cases); i++) {	575 for (size_t i = 0; i < arraysize(cases); i++) {

480 // 8-bit version.	576 // 8-bit version.

481 url_parse::Component component(0,	577 url_parse::Component component(0,

482 static_cast<int>(strlen(cases[i].input8)));	578 static_cast<int>(strlen(cases[i].input8)));

483	579

484 std::string out_str1;	580 std::string out_str1;

485 url_canon::StdStringCanonOutput output1(&out_str1);	581 url_canon::StdStringCanonOutput output1(&out_str1);

486 url_parse::Component out_ip;	582 url_canon::CanonHostInfo host_info;

487 bool success = url_canon::CanonicalizeIPAddress(cases[i].input8, component,	583 url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1,

488 &output1, &out_ip);	584 &host_info);

489 output1.Complete();	585 output1.Complete();

490	586

491 EXPECT_EQ(cases[i].expected_success, success);	587 EXPECT_EQ(cases[i].expected_family, host_info.family);

492 if (success) {	588 if (host_info.family == CanonHostInfo::IPV4) {

493 EXPECT_STREQ(cases[i].expected, out_str1.c_str());	589 EXPECT_STREQ(cases[i].expected, out_str1.c_str());

494 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin);	590 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);

495 EXPECT_EQ(cases[i].expected_component.len, out_ip.len);	591 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);

	592 EXPECT_EQ(cases[i].expected_num_ipv4_components,

	593 host_info.num_ipv4_components);

496 }	594 }

497	595

498 // 16-bit version.	596 // 16-bit version.

499 string16 input16(WStringToUTF16(cases[i].input16));	597 string16 input16(WStringToUTF16(cases[i].input16));

500 component = url_parse::Component(0, static_cast<int>(input16.length()));	598 component = url_parse::Component(0, static_cast<int>(input16.length()));

501	599

502 std::string out_str2;	600 std::string out_str2;

503 url_canon::StdStringCanonOutput output2(&out_str2);	601 url_canon::StdStringCanonOutput output2(&out_str2);

504 success = url_canon::CanonicalizeIPAddress(input16.c_str(), component,	602 url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2,

505 &output2, &out_ip);	603 &host_info);

506 output2.Complete();	604 output2.Complete();

507	605

508 EXPECT_EQ(cases[i].expected_success, success);	606 EXPECT_EQ(cases[i].expected_family, host_info.family);

509 if (success) {	607 if (host_info.family == CanonHostInfo::IPV4) {

510 EXPECT_STREQ(cases[i].expected, out_str1.c_str());	608 EXPECT_STREQ(cases[i].expected, out_str2.c_str());

511 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin);	609 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);

512 EXPECT_EQ(cases[i].expected_component.len, out_ip.len);	610 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);

	611 EXPECT_EQ(cases[i].expected_num_ipv4_components,

	612 host_info.num_ipv4_components);

513 }	613 }

514 }	614 }

515 }	615 }

516	616

517 TEST(URLCanonTest, IPv6) {	617 TEST(URLCanonTest, IPv6) {

518 DualComponentCase cases[] = {	618 IPAddressCase cases[] = {

519 // Empty is not an IP address.	619 // Empty is not an IP address.

520 {"", L"", "", url_parse::Component(), false},	620 {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},

521 {":", L":", "", url_parse::Component(), false},	621 // Non-IPs with [:] characters are marked BROKEN.

522 {"[", L"[", "", url_parse::Component(), false},	622 {":", L":", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

523 {"[:", L"[:", "", url_parse::Component(), false},	623 {"[", L"[", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

524 {"]", L"]", "", url_parse::Component(), false},	624 {"[:", L"[:", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

525 {":]", L":]", "", url_parse::Component(), false},	625 {"]", L"]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

526 {"[]", L"[]", "", url_parse::Component(), false},	626 {":]", L":]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

527 {"[:]", L"[:]", "", url_parse::Component(), false},	627 {"[]", L"[]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

	628 {"[:]", L"[:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

528 // Regular IP address is invalid without bounding '[' and ']'.	629 // Regular IP address is invalid without bounding '[' and ']'.

529 {"2001:db8::1", L"2001:db8::1", "", url_parse::Component(), false},	630 {"2001:db8::1", L"2001:db8::1", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

530 {"[2001:db8::1", L"[2001:db8::1", "", url_parse::Component(), false},	631 {"[2001:db8::1", L"[2001:db8::1", "", url_parse::Component(), CanonHostInfo: :BROKEN, -1},

531 {"2001:db8::1]", L"2001:db8::1]", "", url_parse::Component(), false},	632 {"2001:db8::1]", L"2001:db8::1]", "", url_parse::Component(), CanonHostInfo: :BROKEN, -1},

532 // Regular IP addresses.	633 // Regular IP addresses.

533 {"[::]", L"[::]", "[::]", url_parse::Component(0,4), true},	634 {"[::]", L"[::]", "[::]", url_parse::Component(0,4), CanonHostInfo::IPV6, -1 },

534 {"[::1]", L"[::1]", "[::1]", url_parse::Component(0,5), true},	635 {"[::1]", L"[::1]", "[::1]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1},

535 {"[1::]", L"[1::]", "[1::]", url_parse::Component(0,5), true},	636 {"[1::]", L"[1::]", "[1::]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1},

536 {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", url_parse::Component(0 ,10), true},	637 {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", url_parse::Component(0 ,10), CanonHostInfo::IPV6, -1},

537 {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", url_par se::Component(0,15), true},	638 {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", url_par se::Component(0,15), CanonHostInfo::IPV6, -1},

538	639

539 // Leading zeros should be stripped.	640 // Leading zeros should be stripped.

540 {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4: 5:6:7]", url_parse::Component(0,17), true},	641 {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4: 5:6:7]", url_parse::Component(0,17), CanonHostInfo::IPV6, -1},

541	642

542 // Upper case letters should be lowercased.	643 // Upper case letters should be lowercased.

543 {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", ur l_parse::Component(0,20), true},	644 {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", ur l_parse::Component(0,20), CanonHostInfo::IPV6, -1},

544	645

545 // The same address can be written with different contractions, but should	646 // The same address can be written with different contractions, but should

546 // get canonicalized to the same thing.	647 // get canonicalized to the same thing.

547 {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", url_parse::Component (0,14), true},	648 {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", url_parse::Component (0,14), CanonHostInfo::IPV6, -1},

548 {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", url_parse::Component (0,14), true},	649 {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", url_parse::Component (0,14), CanonHostInfo::IPV6, -1},

549	650

550 // IPv4 addresses	651 // IPv4 addresses

551 // Only mapped and compat addresses can have IPv4 syntax embedded.	652 // Only mapped and compat addresses can have IPv4 syntax embedded.

552 {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "", url_parse::Component() , false},	653 {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "", url_parse::Component() , CanonHostInfo::BROKEN, -1},

553 {"[2001::192.168.0.1]", L"[2001::92.168.0.1]", "", url_parse::Component(), f alse},	654 {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

554 {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", url_parse::Component (), false},	655 {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", url_parse::Component (), CanonHostInfo::BROKEN, -1},

555	656

556 // IPv4 with truncation and last component missing.	657 // IPv4 with last component missing.

557 {"[::ffff:276.1.2]", L"[::ffff:276.1.2]", "[::ffff:1401:2]", url_parse::Comp onent(0,15), true},	658 {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", url_parse::Comp onent(0,15), CanonHostInfo::IPV6, -1},

558	659

559 // IPv4 using hex.	660 // IPv4 using hex.

560 // TODO(eroman): Should this format be disallowed?	661 // TODO(eroman): Should this format be disallowed?

561 {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8: 1]", url_parse::Component(0,15), true},	662 {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8: 1]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1},

562	663

563 // There may be zeros surrounding the "::" contraction.	664 // There may be zeros surrounding the "::" contraction.

564 {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", url_parse::Component(0,5), true},	665 {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", url_parse::Component(0,5), CanonH ostInfo::IPV6, -1},

565	666

566 {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", url_parse::Component(0, 13), true},	667 {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", url_parse::Component(0, 13), CanonHostInfo::IPV6, -1},

567	668

568 // Can only have one "::" contraction in an IPv6 string literal.	669 // Can only have one "::" contraction in an IPv6 string literal.

569 {"[2001::db8::1]", L"[2001::db8::1]", "", url_parse::Component(), false},	670 {"[2001::db8::1]", L"[2001::db8::1]", "", url_parse::Component(), CanonHostI nfo::BROKEN, -1},

570 // No more than 2 consecutive ':'s.	671 // No more than 2 consecutive ':'s.

571 {"[2001:db8:::1]", L"[2001:db8:::1]", "", url_parse::Component(), false},	672 {"[2001:db8:::1]", L"[2001:db8:::1]", "", url_parse::Component(), CanonHostI nfo::BROKEN, -1},

572 {"[:::]", L"[:::]", "", url_parse::Component(), false},	673 {"[:::]", L"[:::]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

573 // Non-IP addresses due to invalid characters.	674 // Non-IP addresses due to invalid characters.

574 {"[2001::.com]", L"[2001::.com]", "", url_parse::Component(), false},	675 {"[2001::.com]", L"[2001::.com]", "", url_parse::Component(), CanonHostInfo: :BROKEN, -1},

575 // If there are not enough components, the last one should fill them out.	676 // If there are not enough components, the last one should fill them out.

576 // ... omitted at this time ...	677 // ... omitted at this time ...

577 // Too many components means not an IP address. Similarly with too few if using IPv4 compat or mapped addresses.	678 // Too many components means not an IP address. Similarly with too few if using IPv4 compat or mapped addresses.

578 {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", url_parse::Component(), fals e},	679 {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", url_parse::Component(), Cano nHostInfo::BROKEN, -1},

579 {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", url_parse::Compone nt(), false},	680 {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", url_parse::Compone nt(), CanonHostInfo::BROKEN, -1},

580 {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", url_parse::Component(), false},	681 {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

581 // Too many bits (even though 8 comonents, the last one holds 32 bits).	682 // Too many bits (even though 8 comonents, the last one holds 32 bits).

582 {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", url_pars e::Component(), false},	683 {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", url_pars e::Component(), CanonHostInfo::BROKEN, -1},

583	684

584 // Too many bits specified -- the contraction would have to be zero-length	685 // Too many bits specified -- the contraction would have to be zero-length

585 // to not exceed 128 bits.	686 // to not exceed 128 bits.

586 {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", url_parse: :Component(), false},	687 {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", url_parse: :Component(), CanonHostInfo::BROKEN, -1},

587	688

588 // The contraction is for 16 bits of zero.	689 // The contraction is for 16 bits of zero.

589 {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", url_parse::Co mponent(0,17), true},	690 {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", url_parse::Co mponent(0,17), CanonHostInfo::IPV6, -1},

590	691

591 // Cannot have a trailing colon.	692 // Cannot have a trailing colon.

592 {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", url_parse::Component(), fa lse},	693 {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", url_parse::Component(), Ca nonHostInfo::BROKEN, -1},

593 {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", url_parse: :Component(), false},	694 {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", url_parse: :Component(), CanonHostInfo::BROKEN, -1},

594	695

595 // Cannot have negative numbers.	696 // Cannot have negative numbers.

596 {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", url_parse::Component(), fa lse},	697 {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", url_parse::Component(), Ca nonHostInfo::BROKEN, -1},

597	698

598 // Scope ID -- the URL may contain an optional ["%" <scope_id>] section.	699 // Scope ID -- the URL may contain an optional ["%" <scope_id>] section.

599 // The scope_id should be included in the canonicalized URL, and is an	700 // The scope_id should be included in the canonicalized URL, and is an

600 // unsigned decimal number.	701 // unsigned decimal number.

601	702

602 // Invalid because no ID was given after the percent.	703 // Invalid because no ID was given after the percent.

603	704

604 // Don't allow scope-id	705 // Don't allow scope-id

605 {"[1::%1]", L"[1::%1]", "", url_parse::Component(), false},	706 {"[1::%1]", L"[1::%1]", "", url_parse::Component(), CanonHostInfo::BROKEN, - 1},

606 {"[1::%eth0]", L"[1::%eth0]", "", url_parse::Component(), false},	707 {"[1::%eth0]", L"[1::%eth0]", "", url_parse::Component(), CanonHostInfo::BRO KEN, -1},

607 {"[1::%]", L"[1::%]", "", url_parse::Component(), false},	708 {"[1::%]", L"[1::%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1} ,

608 {"[%]", L"[%]", "", url_parse::Component(), false},	709 {"[%]", L"[%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},

609 {"[::%:]", L"[::%:]", "", url_parse::Component(), false},	710 {"[::%:]", L"[::%:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1} ,

610	711

611 // Don't allow leading or trailing colons.	712 // Don't allow leading or trailing colons.

612 {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", url_parse::Component(), false},	713 {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", url_parse::Component(), CanonHostInf o::BROKEN, -1},

613 {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", url_parse::Component(), false},	714 {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", url_parse::Component(), CanonHostInf o::BROKEN, -1},

614 {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", url_parse::Component(), false},	715 {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", url_parse::Component(), CanonHostI nfo::BROKEN, -1},

615	716

616 // We allow a single trailing dot.	717 // We allow a single trailing dot.

617 // ... omitted at this time ...	718 // ... omitted at this time ...

618 // Two dots in a row means not an IP address.	719 // Two dots in a row means not an IP address.

619 {"[::192.168..1]", L"[::192.168..1]", "", url_parse::Component(), false},	720 {"[::192.168..1]", L"[::192.168..1]", "", url_parse::Component(), CanonHostI nfo::BROKEN, -1},

620 // Any non-first components get truncated to one byte.	721 // Any non-first components get truncated to one byte.

621 // ... omitted at this time ...	722 // ... omitted at this time ...

622 // Spaces should be rejected.	723 // Spaces should be rejected.

623 {"[::1 hello]", L"[::1 hello]", "", url_parse::Component(), false},	724 {"[::1 hello]", L"[::1 hello]", "", url_parse::Component(), CanonHostInfo::B ROKEN, -1},

624 };	725 };

625	726

626 for (size_t i = 0; i < arraysize(cases); i++) {	727 for (size_t i = 0; i < arraysize(cases); i++) {

627 // Print some context of what test we were on, to help debug failures.

628 SCOPED_TRACE(cases[i].input8);

629

630 // 8-bit version.	728 // 8-bit version.

631 url_parse::Component component(0,	729 url_parse::Component component(0,

632 static_cast<int>(strlen(cases[i].input8)));	730 static_cast<int>(strlen(cases[i].input8)));

633	731

634 std::string out_str1;	732 std::string out_str1;

635 url_canon::StdStringCanonOutput output1(&out_str1);	733 url_canon::StdStringCanonOutput output1(&out_str1);

636 url_parse::Component out_ip;	734 url_canon::CanonHostInfo host_info;

637 bool success = url_canon::CanonicalizeIPAddress(cases[i].input8, component,	735 url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1,

638 &output1, &out_ip);	736 &host_info);

639 output1.Complete();	737 output1.Complete();

640	738

641 EXPECT_EQ(cases[i].expected_success, success);	739 EXPECT_EQ(cases[i].expected_family, host_info.family);

642 if (success) {	740 if (host_info.family == CanonHostInfo::IPV6) {

643 EXPECT_STREQ(cases[i].expected, out_str1.c_str());	741 EXPECT_STREQ(cases[i].expected, out_str1.c_str());

644 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin);	742 EXPECT_EQ(cases[i].expected_component.begin,

645 EXPECT_EQ(cases[i].expected_component.len, out_ip.len);	743 host_info.out_host.begin);

	744 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);

646 }	745 }

647	746

648 // 16-bit version.	747 // 16-bit version.

649 string16 input16(WStringToUTF16(cases[i].input16));	748 string16 input16(WStringToUTF16(cases[i].input16));

650 component = url_parse::Component(0, static_cast<int>(input16.length()));	749 component = url_parse::Component(0, static_cast<int>(input16.length()));

651	750

652 std::string out_str2;	751 std::string out_str2;

653 url_canon::StdStringCanonOutput output2(&out_str2);	752 url_canon::StdStringCanonOutput output2(&out_str2);

654 success = url_canon::CanonicalizeIPAddress(input16.c_str(), component,	753 url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2,

655 &output2, &out_ip);	754 &host_info);

656 output2.Complete();	755 output2.Complete();

657	756

658 EXPECT_EQ(cases[i].expected_success, success);	757 EXPECT_EQ(cases[i].expected_family, host_info.family);

659 if (success) {	758 if (host_info.family == CanonHostInfo::IPV6) {

660 EXPECT_STREQ(cases[i].expected, out_str1.c_str());	759 EXPECT_STREQ(cases[i].expected, out_str2.c_str());

661 EXPECT_EQ(cases[i].expected_component.begin, out_ip.begin);	760 EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);

662 EXPECT_EQ(cases[i].expected_component.len, out_ip.len);	761 EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);

663 }	762 }

664 }	763 }

665 }	764 }

666	765

667 TEST(URLCanonTest, UserInfo) {	766 TEST(URLCanonTest, UserInfo) {

668 // Note that the canonicalizer should escape and treat empty components as	767 // Note that the canonicalizer should escape and treat empty components as

669 // not being there.	768 // not being there.

670	769

671 // We actually parse a full input URL so we can get the initial components.	770 // We actually parse a full input URL so we can get the initial components.

672 struct UserComponentCase {	771 struct UserComponentCase {

(...skipping 398 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1071 TEST(URLCanonTest, CanonicalizeStandardURL) {	1170 TEST(URLCanonTest, CanonicalizeStandardURL) {

1072 // The individual component canonicalize tests should have caught the cases	1171 // The individual component canonicalize tests should have caught the cases

1073 // for each of those components. Here, we just need to test that the various	1172 // for each of those components. Here, we just need to test that the various

1074 // parts are included or excluded properly, and have the correct separators.	1173 // parts are included or excluded properly, and have the correct separators.

1075 struct URLCase {	1174 struct URLCase {

1076 const char* input;	1175 const char* input;

1077 const char* expected;	1176 const char* expected;

1078 bool expected_success;	1177 bool expected_success;

1079 } cases[] = {	1178 } cases[] = {

1080 {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#", true},	1179 {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#", true},

	1180 {"http://[www.google.com]/", "http://[www.google.com]/", false},

1081 {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false },	1181 {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false },

1082 {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", t rue},	1182 {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", t rue},

1083 {"www.google.com", ":www.google.com/", true},	1183 {"www.google.com", ":www.google.com/", true},

1084 {"http://192.0x00A80001", "http://192.168.0.1/", true},	1184 {"http://192.0x00A80001", "http://192.168.0.1/", true},

1085 {"http://www/foo%2Ehtml", "http://www/foo.html", true},	1185 {"http://www/foo%2Ehtml", "http://www/foo.html", true},

1086	1186

1087 // Backslashes should get converted to forward slashes.	1187 // Backslashes should get converted to forward slashes.

1088 {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true},	1188 {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true},

1089	1189

1090 // Busted refs shouldn't make the whole thing fail.	1190 // Busted refs shouldn't make the whole thing fail.

(...skipping 717 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1808 url_canon::StdStringCanonOutput repl_output(&repl_str);	1908 url_canon::StdStringCanonOutput repl_output(&repl_str);

1809 url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed) ;	1909 url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed) ;

1810 repl_output.Complete();	1910 repl_output.Complete();

1811	1911

1812 // Generate the expected string and check.	1912 // Generate the expected string and check.

1813 std::string expected("file:///foo?");	1913 std::string expected("file:///foo?");

1814 for (size_t i = 0; i < new_query.length(); i++)	1914 for (size_t i = 0; i < new_query.length(); i++)

1815 expected.push_back('a');	1915 expected.push_back('a');

1816 EXPECT_TRUE(expected == repl_str);	1916 EXPECT_TRUE(expected == repl_str);

1817 }	1917 }

OLD	NEW

« no previous file with comments | « src/url_canon_ip.cc ('k') | no next file » | no next file with comments »