Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
|
mef
2014/05/01 19:45:15
2014? or this is just an extract of ICU dependenci
mmenke
2014/05/01 21:01:33
This is largely just a separation of the old UrlCa
mef
2014/05/02 15:10:48
sgtm
| |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/macros.h" | |
| 6 #include "testing/gtest/include/gtest/gtest.h" | |
| 7 #include "third_party/icu/source/common/unicode/ucnv.h" | |
| 8 #include "url/url_canon.h" | |
| 9 #include "url/url_canon_icu.h" | |
| 10 #include "url/url_canon_stdstring.h" | |
| 11 #include "url/url_test_utils.h" | |
| 12 | |
| 13 // Some implementations of base/basictypes.h may define ARRAYSIZE. | |
| 14 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro | |
| 15 // which is in our version of basictypes.h. | |
| 16 #ifndef ARRAYSIZE | |
| 17 #define ARRAYSIZE ARRAYSIZE_UNSAFE | |
| 18 #endif | |
| 19 | |
| 20 namespace url { | |
| 21 | |
| 22 using test_utils::WStringToUTF16; | |
| 23 | |
| 24 namespace { | |
| 25 | |
| 26 // Wrapper around a UConverter object that managers creation and destruction. | |
| 27 class UConvScoper { | |
| 28 public: | |
| 29 explicit UConvScoper(const char* charset_name) { | |
| 30 UErrorCode err = U_ZERO_ERROR; | |
| 31 converter_ = ucnv_open(charset_name, &err); | |
| 32 } | |
| 33 | |
| 34 ~UConvScoper() { | |
| 35 if (converter_) | |
| 36 ucnv_close(converter_); | |
| 37 } | |
| 38 | |
| 39 // Returns the converter object, may be NULL. | |
| 40 UConverter* converter() const { return converter_; } | |
| 41 | |
| 42 private: | |
| 43 UConverter* converter_; | |
| 44 }; | |
| 45 | |
| 46 TEST(URLCanonIcuTest, ICUCharsetConverter) { | |
| 47 struct ICUCase { | |
| 48 const wchar_t* input; | |
| 49 const char* encoding; | |
| 50 const char* expected; | |
| 51 } icu_cases[] = { | |
| 52 // UTF-8. | |
| 53 {L"Hello, world", "utf-8", "Hello, world"}, | |
| 54 {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"}, | |
| 55 // Non-BMP UTF-8. | |
| 56 {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"}, | |
| 57 // Big5 | |
| 58 {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"}, | |
| 59 // Unrepresentable character in the destination set. | |
| 60 {L"hello\x4f60\x06de\x597dworld", "big5", | |
| 61 "hello\xa7\x41%26%231758%3B\xa6\x6eworld"}, | |
| 62 }; | |
| 63 | |
| 64 for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) { | |
| 65 UConvScoper conv(icu_cases[i].encoding); | |
| 66 ASSERT_TRUE(conv.converter() != NULL); | |
| 67 ICUCharsetConverter converter(conv.converter()); | |
| 68 | |
| 69 std::string str; | |
| 70 StdStringCanonOutput output(&str); | |
| 71 | |
| 72 base::string16 input_str(WStringToUTF16(icu_cases[i].input)); | |
| 73 int input_len = static_cast<int>(input_str.length()); | |
| 74 converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); | |
| 75 output.Complete(); | |
| 76 | |
| 77 EXPECT_STREQ(icu_cases[i].expected, str.c_str()); | |
| 78 } | |
| 79 | |
| 80 // Test string sizes around the resize boundary for the output to make sure | |
| 81 // the converter resizes as needed. | |
| 82 const int static_size = 16; | |
| 83 UConvScoper conv("utf-8"); | |
| 84 ASSERT_TRUE(conv.converter()); | |
| 85 ICUCharsetConverter converter(conv.converter()); | |
| 86 for (int i = static_size - 2; i <= static_size + 2; i++) { | |
| 87 // Make a string with the appropriate length. | |
| 88 base::string16 input; | |
| 89 for (int ch = 0; ch < i; ch++) | |
| 90 input.push_back('a'); | |
| 91 | |
| 92 RawCanonOutput<static_size> output; | |
| 93 converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()), | |
| 94 &output); | |
| 95 EXPECT_EQ(input.length(), static_cast<size_t>(output.length())); | |
| 96 } | |
| 97 } | |
| 98 | |
| 99 TEST(URLCanonIcuTest, QueryWithConverter) { | |
| 100 struct QueryCase { | |
| 101 const char* input8; | |
| 102 const wchar_t* input16; | |
| 103 const char* encoding; | |
| 104 const char* expected; | |
| 105 } query_cases[] = { | |
| 106 // Regular ASCII case in some different encodings. | |
| 107 {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"}, | |
| 108 {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"}, | |
| 109 {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"}, | |
| 110 // Chinese input/output | |
| 111 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", | |
| 112 "?q=%C4%E3%BA%C3"}, | |
| 113 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"}, | |
| 114 // Unencodable character in the destination character set should be | |
| 115 // escaped. The escape sequence unescapes to be the entity name: | |
| 116 // "?q=你" | |
| 117 {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", | |
| 118 "?q=Chinese%26%2365319%3B"}, | |
| 119 }; | |
| 120 | |
| 121 for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) { | |
| 122 Component out_comp; | |
| 123 | |
| 124 UConvScoper conv(query_cases[i].encoding); | |
| 125 ASSERT_TRUE(!query_cases[i].encoding || conv.converter()); | |
| 126 ICUCharsetConverter converter(conv.converter()); | |
| 127 | |
| 128 if (query_cases[i].input8) { | |
| 129 int len = static_cast<int>(strlen(query_cases[i].input8)); | |
| 130 Component in_comp(0, len); | |
| 131 std::string out_str; | |
| 132 | |
| 133 StdStringCanonOutput output(&out_str); | |
| 134 CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output, | |
| 135 &out_comp); | |
| 136 output.Complete(); | |
| 137 | |
| 138 EXPECT_EQ(query_cases[i].expected, out_str); | |
| 139 } | |
| 140 | |
| 141 if (query_cases[i].input16) { | |
| 142 base::string16 input16(WStringToUTF16(query_cases[i].input16)); | |
| 143 int len = static_cast<int>(input16.length()); | |
| 144 Component in_comp(0, len); | |
| 145 std::string out_str; | |
| 146 | |
| 147 StdStringCanonOutput output(&out_str); | |
| 148 CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output, | |
| 149 &out_comp); | |
| 150 output.Complete(); | |
| 151 | |
| 152 EXPECT_EQ(query_cases[i].expected, out_str); | |
| 153 } | |
| 154 } | |
| 155 | |
| 156 // Extra test for input with embedded NULL; | |
| 157 std::string out_str; | |
| 158 StdStringCanonOutput output(&out_str); | |
| 159 Component out_comp; | |
| 160 CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp); | |
| 161 output.Complete(); | |
| 162 EXPECT_EQ("?a%20%00z%01", out_str); | |
| 163 } | |
| 164 | |
| 165 } // namespace | |
| 166 | |
| 167 } // namespace url | |
| OLD | NEW |