Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(447)

Side by Side Diff: url/url_canon_unittest.cc

Issue 2397873002: Reject some previuosly-escaped chars in hostnames.
Patch Set: Some tests fixed Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « url/url_canon_host.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <errno.h> 5 #include <errno.h>
6 #include <stddef.h> 6 #include <stddef.h>
7 7
8 #include "base/macros.h" 8 #include "base/macros.h"
9 #include "base/strings/utf_string_conversions.h" 9 #include "base/strings/utf_string_conversions.h"
10 #include "testing/gtest/include/gtest/gtest.h" 10 #include "testing/gtest/include/gtest/gtest.h"
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after
287 287
288 EXPECT_EQ(std::string(":"), out_str); 288 EXPECT_EQ(std::string(":"), out_str);
289 EXPECT_EQ(0, out_comp.begin); 289 EXPECT_EQ(0, out_comp.begin);
290 EXPECT_EQ(0, out_comp.len); 290 EXPECT_EQ(0, out_comp.len);
291 } 291 }
292 292
293 TEST(URLCanonTest, Host) { 293 TEST(URLCanonTest, Host) {
294 IPAddressCase host_cases[] = { 294 IPAddressCase host_cases[] = {
295 // Basic canonicalization, uppercase should be converted to lowercase. 295 // Basic canonicalization, uppercase should be converted to lowercase.
296 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10), CanonHostInfo: :NEUTRAL, -1, ""}, 296 {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10), CanonHostInfo: :NEUTRAL, -1, ""},
297 // Spaces and some other characters should be escaped. 297 // Spaces and some other characters should fail (used to be allowed).
298 {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", Comp onent(0, 22), CanonHostInfo::NEUTRAL, -1, ""}, 298 {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", Comp onent(0, 22), CanonHostInfo::BROKEN, -1, ""},
299 // Exciting different types of spaces! 299 // Exciting different types of spaces!
300 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16), Cano nHostInfo::NEUTRAL, -1, ""}, 300 {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16), Cano nHostInfo::BROKEN, -1, ""},
301 // Other types of space (no-break, zero-width, zero-width-no-break) are 301 // Other types of space (no-break, zero-width, zero-width-no-break) are
302 // name-prepped away to nothing. 302 // name-prepped away to nothing.
303 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10), Cano nHostInfo::NEUTRAL, -1, ""}, 303 {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10), Cano nHostInfo::NEUTRAL, -1, ""},
304 // Ideographic full stop (full-width period for Chinese, etc.) should be 304 // Ideographic full stop (full-width period for Chinese, etc.) should be
305 // treated as a dot. 305 // treated as a dot.
306 {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", Component(0, 15), Can onHostInfo::NEUTRAL, -1, ""}, 306 {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", Component(0, 15), Can onHostInfo::NEUTRAL, -1, ""},
307 // Invalid unicode characters should fail... 307 // Invalid unicode characters should fail...
308 // ...In wide input, ICU will barf and we'll end up with the input as 308 // ...In wide input, ICU will barf and we'll end up with the input as
309 // escaped UTF-8 (the invalid character should be replaced with the 309 // escaped UTF-8 (the invalid character should be replaced with the
310 // replacement character). 310 // replacement character).
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
354 // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional 354 // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional
355 // handling as well as in IDNA 2003. 355 // handling as well as in IDNA 2003.
356 {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc", 356 {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc",
357 Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""}, 357 Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""},
358 // ZWJ between Devanagari characters is still mapped away in UTS 46 358 // ZWJ between Devanagari characters is still mapped away in UTS 46
359 // transitional handling. IDNA 2008 would give xn--11bo0mv54g. 359 // transitional handling. IDNA 2008 would give xn--11bo0mv54g.
360 {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c", 360 {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c",
361 L"\x915\x94d\x200d\x91c", "xn--11bo0m", 361 L"\x915\x94d\x200d\x91c", "xn--11bo0m",
362 Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, 362 Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
363 // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b) 363 // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b)
364 // However, we do allow this at the moment because we don't use 364 // Full-width ASCII is converted to ASCII.
365 // STD3 rules and canonicalize full-width ASCII to ASCII.
366 {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", 365 {"wow\xef\xbc\x81", L"wow\xff01", "wow%21",
367 Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""}, 366 Component(0, 6), CanonHostInfo::BROKEN, -1, ""},
368 // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c) 367 // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c)
369 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 368 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
370 {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo", 369 {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo",
371 Component(0, 11), CanonHostInfo::BROKEN, -1, ""}, 370 Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
372 // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d) 371 // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d)
373 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 372 // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
374 {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn", 373 {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn",
375 "%F0%AF%A1%A8%E5%A7%BB.cn", 374 "%F0%AF%A1%A8%E5%A7%BB.cn",
376 Component(0, 24), CanonHostInfo::BROKEN, -1, ""}, 375 Component(0, 24), CanonHostInfo::BROKEN, -1, ""},
377 // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e) 376 // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e)
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
471 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", 470 {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01",
472 "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, 471 "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3,
473 "C0A80001"}, 472 "C0A80001"},
474 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", 473 {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e",
475 "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, 474 "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3,
476 "C0A80001"}, 475 "C0A80001"},
477 // Invalid escaping should trigger the regular host error handling. 476 // Invalid escaping should trigger the regular host error handling.
478 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", Component(0, 17), CanonHostInfo::BROKEN, -1, ""}, 477 {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", Component(0, 17), CanonHostInfo::BROKEN, -1, ""},
479 // Something that isn't exactly an IP should get treated as a host and 478 // Something that isn't exactly an IP should get treated as a host and
480 // spaces escaped. 479 // spaces escaped.
481 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", Component (0, 19), CanonHostInfo::NEUTRAL, -1, ""}, 480 {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", Component (0, 19), CanonHostInfo::BROKEN, -1, ""},
482 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. 481 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
483 // These are "0Xc0.0250.01" in fullwidth. 482 // These are "0Xc0.0250.01" in fullwidth.
484 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x 92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", Comp onent(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, 483 {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x 92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\x ff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", Comp onent(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
485 // Broken IP addresses get marked as such. 484 // Broken IP addresses get marked as such.
486 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13), Canon HostInfo::BROKEN, -1, ""}, 485 {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13), Canon HostInfo::BROKEN, -1, ""},
487 {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12), CanonHos tInfo::BROKEN, -1, ""}, 486 {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12), CanonHos tInfo::BROKEN, -1, ""},
488 // Cyrillic letter followed by '(' should return punycode for '(' escaped 487 // Cyrillic letter followed by '(' should return punycode for '(' escaped
489 // before punycode string was created. I.e. 488 // before punycode string was created. I.e.
490 // if '(' is escaped after punycode is created we would get xn--%28-8tb 489 // if '(' is escaped after punycode is created we would get xn--%28-8tb
491 // (incorrect). 490 // (incorrect).
492 {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11), 491 {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
493 CanonHostInfo::NEUTRAL, -1, ""}, 492 CanonHostInfo::BROKEN, -1, ""},
494 // Address with all hexidecimal characters with leading number of 1<<32 493 // Address with all hexidecimal characters with leading number of 1<<32
495 // or greater and should return NEUTRAL rather than BROKEN if not all 494 // or greater and should return NEUTRAL rather than BROKEN if not all
496 // components are numbers. 495 // components are numbers.
497 {"12345678912345.de", L"12345678912345.de", "12345678912345.de", Component(0 , 17), CanonHostInfo::NEUTRAL, -1, ""}, 496 {"12345678912345.de", L"12345678912345.de", "12345678912345.de", Component(0 , 17), CanonHostInfo::NEUTRAL, -1, ""},
498 {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", Compo nent(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, 497 {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", Compo nent(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
499 {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "1 2345678912345.12345678912345.de", Component(0, 32), CanonHostInfo::NEUTRAL, -1, ""}, 498 {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "1 2345678912345.12345678912345.de", Component(0, 32), CanonHostInfo::NEUTRAL, -1, ""},
500 {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", Co mponent(0, 20), CanonHostInfo::NEUTRAL, -1, ""}, 499 {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", Co mponent(0, 20), CanonHostInfo::NEUTRAL, -1, ""},
501 {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", Compo nent(0, 19), CanonHostInfo::BROKEN, -1, ""}, 500 {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", Compo nent(0, 19), CanonHostInfo::BROKEN, -1, ""},
502 }; 501 };
503 502
(...skipping 1746 matching lines...) Expand 10 before | Expand all | Expand 10 after
2250 }; 2249 };
2251 2250
2252 for (auto& test_case : cases) { 2251 for (auto& test_case : cases) {
2253 SCOPED_TRACE(test_case.scheme); 2252 SCOPED_TRACE(test_case.scheme);
2254 EXPECT_EQ(test_case.expected_port, 2253 EXPECT_EQ(test_case.expected_port,
2255 DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme))); 2254 DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme)));
2256 } 2255 }
2257 } 2256 }
2258 2257
2259 } // namespace url 2258 } // namespace url
OLDNEW
« no previous file with comments | « url/url_canon_host.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698