OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <errno.h> | 5 #include <errno.h> |
6 | 6 |
7 #include "testing/gtest/include/gtest/gtest.h" | 7 #include "testing/gtest/include/gtest/gtest.h" |
8 #include "third_party/icu/source/common/unicode/ucnv.h" | 8 #include "third_party/icu/source/common/unicode/ucnv.h" |
9 #include "url/url_canon.h" | 9 #include "url/url_canon.h" |
10 #include "url/url_canon_icu.h" | 10 #include "url/url_canon_icu.h" |
(...skipping 381 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
392 // Test that fullwidth escaped values are properly name-prepped, | 392 // Test that fullwidth escaped values are properly name-prepped, |
393 // then converted or rejected. | 393 // then converted or rejected. |
394 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input) | 394 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input) |
395 {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.c
om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, | 395 {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.c
om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, |
396 {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.c
om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, | 396 {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.c
om", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, |
397 // ...%00 in fullwidth should fail (also as escaped UTF-8 input) | 397 // ...%00 in fullwidth should fail (also as escaped UTF-8 input) |
398 {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00
.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, | 398 {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00
.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, |
399 {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00
.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, | 399 {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00
.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, |
400 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN | 400 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN |
401 {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x5
97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1,
""}, | 401 {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x5
97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1,
""}, |
| 402 // See http://unicode.org/cldr/utility/idna.jsp for other |
| 403 // examples/experiments and http://goo.gl/7yG11o |
| 404 // for the full list of characters handled differently by |
| 405 // IDNA 2003, UTS 46 (http://unicode.org/reports/tr46/ ) and IDNA 2008. |
| 406 |
| 407 // 4 Deviation characters are mapped/ignored in UTS 46 transitional |
| 408 // mechansm. UTS 46, table 4 row (g). |
| 409 // Sharp-s is mapped to 'ss' in UTS 46 and IDNA 2003. |
| 410 // Otherwise, it'd be "xn--fuball-cta.de". |
| 411 {"fu\xc3\x9f" "ball.de", L"fu\x00df" L"ball.de", "fussball.de", |
| 412 url_parse::Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""}, |
| 413 // Final-sigma (U+03C3) is mapped to regular sigma (U+03C2). |
| 414 // Otherwise, it'd be "xn--wxaijb9b". |
| 415 {"\xcf\x83\xcf\x8c\xce\xbb\xce\xbf\xcf\x82", L"\x3c3\x3cc\x3bb\x3bf\x3c2", |
| 416 "xn--wxaikc6b", url_parse::Component(0, 12), |
| 417 CanonHostInfo::NEUTRAL, -1, ""}, |
| 418 // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional |
| 419 // handling as well as in IDNA 2003. |
| 420 {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc", |
| 421 url_parse::Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""}, |
| 422 // ZWJ between Devanagari characters is still mapped away in UTS 46 |
| 423 // transitional handling. IDNA 2008 would give xn--11bo0mv54g. |
| 424 {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c", |
| 425 L"\x915\x94d\x200d\x91c", "xn--11bo0m", |
| 426 url_parse::Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, |
| 427 // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b) |
| 428 // However, we do allow this at the moment because we don't use |
| 429 // STD3 rules and canonicalize full-width ASCII to ASCII. |
| 430 {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", |
| 431 url_parse::Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""}, |
| 432 // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c) |
| 433 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 |
| 434 {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo", |
| 435 url_parse::Component(0, 11), CanonHostInfo::BROKEN, -1, ""}, |
| 436 // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d) |
| 437 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 |
| 438 {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn", |
| 439 "%F0%AF%A1%A8%E5%A7%BB.cn", |
| 440 url_parse::Component(0, 24), CanonHostInfo::BROKEN, -1, ""}, |
| 441 // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e) |
| 442 {"M\xc3\x9cNCHEN", L"M\xdcNCHEN", "xn--mnchen-3ya", |
| 443 url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""}, |
| 444 // Symbol/punctuations are allowed in IDNA 2003/UTS46. |
| 445 // Not allowed in IDNA 2008. UTS 46 table 4 row (f). |
| 446 {"\xe2\x99\xa5ny.us", L"\x2665ny.us", "xn--ny-s0x.us", |
| 447 url_parse::Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""}, |
| 448 // U+11013 is new in Unicode 6.0 and is allowed. UTS 46 table 4, row (h) |
| 449 // We used to allow it because we passed through unassigned code points. |
| 450 {"\xf0\x91\x80\x93.com", L"\xd804\xdc13.com", "xn--n00d.com", |
| 451 url_parse::Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""}, |
| 452 // U+0602 is disallowed in UTS46/IDNA 2008. UTS 46 table 4, row(i) |
| 453 // Used to be allowed in INDA 2003. |
| 454 {"\xd8\x82.eg", L"\x602.eg", "%D8%82.eg", |
| 455 url_parse::Component(0, 9), CanonHostInfo::BROKEN, -1, ""}, |
| 456 // U+20B7 is new in Unicode 5.2 (not a part of IDNA 2003 based |
| 457 // on Unicode 3.2). We did allow it in the past because we let unassigned |
| 458 // code point pass. We continue to allow it even though it's a |
| 459 // "punctuation and symbol" blocked in IDNA 2008. |
| 460 // UTS 46 table 4, row (j) |
| 461 {"\xe2\x82\xb7.com", L"\x20b7.com", "xn--wzg.com", |
| 462 url_parse::Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""}, |
| 463 // Maps uppercase letters to lower case letters. |
| 464 // In IDNA 2003, it's allowed without case-folding |
| 465 // ( xn--bc-7cb.com ) because it's not defined in Unicode 3.2 |
| 466 // (added in Unicode 4.1). UTS 46 table 4 row (k) |
| 467 {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com", |
| 468 url_parse::Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""}, |
| 469 // BiDi check test |
| 470 // "Divehi" in Divehi (Thaana script) ends with BidiClass=NSM. |
| 471 // Disallowed in IDNA 2003 but now allowed in UTS 46/IDNA 2008. |
| 472 {"\xde\x8b\xde\xa8\xde\x88\xde\xac\xde\x80\xde\xa8", |
| 473 L"\x78b\x7a8\x788\x7ac\x780\x7a8", "xn--hqbpi0jcw", |
| 474 url_parse::Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""}, |
| 475 // Disallowed in both IDNA 2003 and 2008 with BiDi check. |
| 476 // Labels starting with a RTL character cannot end with a LTR character. |
| 477 {"\xd8\xac\xd8\xa7\xd8\xb1xyz", L"\x62c\x627\x631xyz", |
| 478 "%D8%AC%D8%A7%D8%B1xyz", url_parse::Component(0, 21), |
| 479 CanonHostInfo::BROKEN, -1, ""}, |
| 480 // Labels starting with a RTL character can end with BC=EN (European |
| 481 // number). Disallowed in IDNA 2003 but now allowed. |
| 482 {"\xd8\xac\xd8\xa7\xd8\xb1" "2", L"\x62c\x627\x631" L"2", |
| 483 "xn--2-ymcov", url_parse::Component(0, 11), |
| 484 CanonHostInfo::NEUTRAL, -1, ""}, |
| 485 // Labels starting with a RTL character cannot have "L" characters |
| 486 // even if it ends with an BC=EN. Disallowed in both IDNA 2003/2008. |
| 487 {"\xd8\xac\xd8\xa7\xd8\xb1xy2", L"\x62c\x627\x631xy2", |
| 488 "%D8%AC%D8%A7%D8%B1xy2", url_parse::Component(0, 21), |
| 489 CanonHostInfo::BROKEN, -1, ""}, |
| 490 // Labels starting with a RTL character can end with BC=AN (Arabic number) |
| 491 // Disallowed in IDNA 2003, but now allowed. |
| 492 {"\xd8\xac\xd8\xa7\xd8\xb1\xd9\xa2", L"\x62c\x627\x631\x662", |
| 493 "xn--mgbjq0r", url_parse::Component(0, 11), |
| 494 CanonHostInfo::NEUTRAL, -1, ""}, |
| 495 // Labels starting with a RTL character cannot have "L" characters |
| 496 // even if it ends with an BC=AN (Arabic number). |
| 497 // Disallowed in both IDNA 2003/2008. |
| 498 {"\xd8\xac\xd8\xa7\xd8\xb1xy\xd9\xa2", L"\x62c\x627\x631xy\x662", |
| 499 "%D8%AC%D8%A7%D8%B1xy%D9%A2", url_parse::Component(0, 26), |
| 500 CanonHostInfo::BROKEN, -1, ""}, |
| 501 // Labels starting with a RTL character cannot mix BC=EN and BC=AN |
| 502 {"\xd8\xac\xd8\xa7\xd8\xb1xy2\xd9\xa2", L"\x62c\x627\x631xy2\x662", |
| 503 "%D8%AC%D8%A7%D8%B1xy2%D9%A2", url_parse::Component(0, 27), |
| 504 CanonHostInfo::BROKEN, -1, ""}, |
| 505 // As of Unicode 6.2, U+20CF is not assigned. We do not allow it. |
| 506 {"\xe2\x83\x8f.com", L"\x20cf.com", "%E2%83%8F.com", |
| 507 url_parse::Component(0, 13), CanonHostInfo::BROKEN, -1, ""}, |
| 508 // U+0080 is not allowed. |
| 509 {"\xc2\x80.com", L"\x80.com", "%C2%80.com", |
| 510 url_parse::Component(0, 10), CanonHostInfo::BROKEN, -1, ""}, |
| 511 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped |
402 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped | 512 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped |
403 // UTF-8 (wide case). The output should be equivalent to the true wide | 513 // UTF-8 (wide case). The output should be equivalent to the true wide |
404 // character input above). | 514 // character input above). |
405 {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x5
97d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1,
""}, | 515 {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", |
| 516 L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba", |
| 517 url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""}, |
406 // Invalid escaped characters should fail and the percents should be | 518 // Invalid escaped characters should fail and the percents should be |
407 // escaped. | 519 // escaped. |
408 {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), CanonHo
stInfo::BROKEN, -1, ""}, | 520 {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), |
| 521 CanonHostInfo::BROKEN, -1, ""}, |
409 // If we get an invalid character that has been escaped. | 522 // If we get an invalid character that has been escaped. |
410 {"%25", L"%25", "%25", url_parse::Component(0, 3), CanonHostInfo::BROKEN, -1
, ""}, | 523 {"%25", L"%25", "%25", url_parse::Component(0, 3), |
411 {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), CanonHostI
nfo::BROKEN, -1, ""}, | 524 CanonHostInfo::BROKEN, -1, ""}, |
| 525 {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), |
| 526 CanonHostInfo::BROKEN, -1, ""}, |
412 // Escaped numbers should be treated like IP addresses if they are. | 527 // Escaped numbers should be treated like IP addresses if they are. |
413 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.1
68.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, | 528 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", |
414 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e",
"192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, | 529 "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, |
| 530 "C0A80001"}, |
| 531 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", |
| 532 "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3, |
| 533 "C0A80001"}, |
415 // Invalid escaping should trigger the regular host error handling. | 534 // Invalid escaping should trigger the regular host error handling. |
416 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01",
"%253gxc0.0250..01", url_parse::Component(0, 17), CanonHostInfo::BROKEN, -1, ""}
, | 535 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01",
"%253gxc0.0250..01", url_parse::Component(0, 17), CanonHostInfo::BROKEN, -1, ""}
, |
417 // Something that isn't exactly an IP should get treated as a host and | 536 // Something that isn't exactly an IP should get treated as a host and |
418 // spaces escaped. | 537 // spaces escaped. |
419 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse
::Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, | 538 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse
::Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, |
420 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. | 539 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. |
421 // These are "0Xc0.0250.01" in fullwidth. | 540 // These are "0Xc0.0250.01" in fullwidth. |
422 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x
92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x
ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_
parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, | 541 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x
92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x
ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_
parse::Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, |
423 // Broken IP addresses get marked as such. | 542 // Broken IP addresses get marked as such. |
424 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", url_parse::Component(0,
13), CanonHostInfo::BROKEN, -1, ""}, | 543 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", url_parse::Component(0,
13), CanonHostInfo::BROKEN, -1, ""}, |
425 {"[google.com]", L"[google.com]", "[google.com]", url_parse::Component(0, 12
), CanonHostInfo::BROKEN, -1, ""}, | 544 {"[google.com]", L"[google.com]", "[google.com]", url_parse::Component(0, 12
), CanonHostInfo::BROKEN, -1, ""}, |
426 // Cyrillic letter followed buy ( should return punicode for ( escaped bef
ore punicode string was created. I.e. | 545 // Cyrillic letter followed by '(' should return punycode for '(' escaped |
427 // if ( is escaped after punicode is created we would get xn--%28-8tb (inc
orrect). | 546 // before punycode string was created. I.e. |
428 {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", url_parse::Component(0, 11), CanonH
ostInfo::NEUTRAL, -1, ""}, | 547 // if '(' is escaped after punycode is created we would get xn--%28-8tb |
| 548 // (incorrect). |
| 549 {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", url_parse::Component(0, 11), |
| 550 CanonHostInfo::NEUTRAL, -1, ""}, |
429 // Address with all hexidecimal characters with leading number of 1<<32 | 551 // Address with all hexidecimal characters with leading number of 1<<32 |
430 // or greater and should return NEUTRAL rather than BROKEN if not all | 552 // or greater and should return NEUTRAL rather than BROKEN if not all |
431 // components are numbers. | 553 // components are numbers. |
432 {"12345678912345.de", L"12345678912345.de", "12345678912345.de", url_parse::
Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""}, | 554 {"12345678912345.de", L"12345678912345.de", "12345678912345.de", url_parse::
Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""}, |
433 {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", url_p
arse::Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, | 555 {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", url_p
arse::Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, |
434 {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "1
2345678912345.12345678912345.de", url_parse::Component(0, 32), CanonHostInfo::NE
UTRAL, -1, ""}, | 556 {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "1
2345678912345.12345678912345.de", url_parse::Component(0, 32), CanonHostInfo::NE
UTRAL, -1, ""}, |
435 {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", ur
l_parse::Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""}, | 557 {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", ur
l_parse::Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""}, |
436 {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", url_p
arse::Component(0, 19), CanonHostInfo::BROKEN, -1, ""}, | 558 {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", url_p
arse::Component(0, 19), CanonHostInfo::BROKEN, -1, ""}, |
437 }; | 559 }; |
438 | 560 |
439 // CanonicalizeHost() non-verbose. | 561 // CanonicalizeHost() non-verbose. |
440 std::string out_str; | 562 std::string out_str; |
441 for (size_t i = 0; i < arraysize(host_cases); i++) { | 563 for (size_t i = 0; i < arraysize(host_cases); i++) { |
442 // Narrow version. | 564 // Narrow version. |
443 if (host_cases[i].input8) { | 565 if (host_cases[i].input8) { |
444 int host_len = static_cast<int>(strlen(host_cases[i].input8)); | 566 int host_len = static_cast<int>(strlen(host_cases[i].input8)); |
445 url_parse::Component in_comp(0, host_len); | 567 url_parse::Component in_comp(0, host_len); |
446 url_parse::Component out_comp; | 568 url_parse::Component out_comp; |
447 | 569 |
448 out_str.clear(); | 570 out_str.clear(); |
449 url_canon::StdStringCanonOutput output(&out_str); | 571 url_canon::StdStringCanonOutput output(&out_str); |
450 | 572 |
451 bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp, | 573 bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp, |
452 &output, &out_comp); | 574 &output, &out_comp); |
453 output.Complete(); | 575 output.Complete(); |
454 | 576 |
455 EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN, | 577 EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN, |
456 success); | 578 success) << "for input: " << host_cases[i].input8; |
457 EXPECT_EQ(std::string(host_cases[i].expected), out_str); | 579 EXPECT_EQ(std::string(host_cases[i].expected), out_str) << |
458 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); | 580 "for input: " << host_cases[i].input8; |
459 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); | 581 EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin) << |
| 582 "for input: " << host_cases[i].input8; |
| 583 EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len) << |
| 584 "for input: " << host_cases[i].input8; |
460 } | 585 } |
461 | 586 |
462 // Wide version. | 587 // Wide version. |
463 if (host_cases[i].input16) { | 588 if (host_cases[i].input16) { |
464 base::string16 input16(WStringToUTF16(host_cases[i].input16)); | 589 base::string16 input16(WStringToUTF16(host_cases[i].input16)); |
465 int host_len = static_cast<int>(input16.length()); | 590 int host_len = static_cast<int>(input16.length()); |
466 url_parse::Component in_comp(0, host_len); | 591 url_parse::Component in_comp(0, host_len); |
467 url_parse::Component out_comp; | 592 url_parse::Component out_comp; |
468 | 593 |
469 out_str.clear(); | 594 out_str.clear(); |
(...skipping 1634 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2104 url_canon::StdStringCanonOutput repl_output(&repl_str); | 2229 url_canon::StdStringCanonOutput repl_output(&repl_str); |
2105 url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed)
; | 2230 url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed)
; |
2106 repl_output.Complete(); | 2231 repl_output.Complete(); |
2107 | 2232 |
2108 // Generate the expected string and check. | 2233 // Generate the expected string and check. |
2109 std::string expected("file:///foo?"); | 2234 std::string expected("file:///foo?"); |
2110 for (size_t i = 0; i < new_query.length(); i++) | 2235 for (size_t i = 0; i < new_query.length(); i++) |
2111 expected.push_back('a'); | 2236 expected.push_back('a'); |
2112 EXPECT_TRUE(expected == repl_str); | 2237 EXPECT_TRUE(expected == repl_str); |
2113 } | 2238 } |
OLD | NEW |