OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
mef
2014/05/01 19:45:15
2014? or this is just an extract of ICU dependenci
mmenke
2014/05/01 21:01:33
This is largely just a separation of the old UrlCa
mef
2014/05/02 15:10:48
sgtm
| |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "base/macros.h" | |
6 #include "testing/gtest/include/gtest/gtest.h" | |
7 #include "third_party/icu/source/common/unicode/ucnv.h" | |
8 #include "url/url_canon.h" | |
9 #include "url/url_canon_icu.h" | |
10 #include "url/url_canon_stdstring.h" | |
11 #include "url/url_test_utils.h" | |
12 | |
13 // Some implementations of base/basictypes.h may define ARRAYSIZE. | |
14 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro | |
15 // which is in our version of basictypes.h. | |
16 #ifndef ARRAYSIZE | |
17 #define ARRAYSIZE ARRAYSIZE_UNSAFE | |
18 #endif | |
19 | |
20 namespace url { | |
21 | |
22 using test_utils::WStringToUTF16; | |
23 | |
24 namespace { | |
25 | |
26 // Wrapper around a UConverter object that managers creation and destruction. | |
27 class UConvScoper { | |
28 public: | |
29 explicit UConvScoper(const char* charset_name) { | |
30 UErrorCode err = U_ZERO_ERROR; | |
31 converter_ = ucnv_open(charset_name, &err); | |
32 } | |
33 | |
34 ~UConvScoper() { | |
35 if (converter_) | |
36 ucnv_close(converter_); | |
37 } | |
38 | |
39 // Returns the converter object, may be NULL. | |
40 UConverter* converter() const { return converter_; } | |
41 | |
42 private: | |
43 UConverter* converter_; | |
44 }; | |
45 | |
46 TEST(URLCanonIcuTest, ICUCharsetConverter) { | |
47 struct ICUCase { | |
48 const wchar_t* input; | |
49 const char* encoding; | |
50 const char* expected; | |
51 } icu_cases[] = { | |
52 // UTF-8. | |
53 {L"Hello, world", "utf-8", "Hello, world"}, | |
54 {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"}, | |
55 // Non-BMP UTF-8. | |
56 {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"}, | |
57 // Big5 | |
58 {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"}, | |
59 // Unrepresentable character in the destination set. | |
60 {L"hello\x4f60\x06de\x597dworld", "big5", | |
61 "hello\xa7\x41%26%231758%3B\xa6\x6eworld"}, | |
62 }; | |
63 | |
64 for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) { | |
65 UConvScoper conv(icu_cases[i].encoding); | |
66 ASSERT_TRUE(conv.converter() != NULL); | |
67 ICUCharsetConverter converter(conv.converter()); | |
68 | |
69 std::string str; | |
70 StdStringCanonOutput output(&str); | |
71 | |
72 base::string16 input_str(WStringToUTF16(icu_cases[i].input)); | |
73 int input_len = static_cast<int>(input_str.length()); | |
74 converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); | |
75 output.Complete(); | |
76 | |
77 EXPECT_STREQ(icu_cases[i].expected, str.c_str()); | |
78 } | |
79 | |
80 // Test string sizes around the resize boundary for the output to make sure | |
81 // the converter resizes as needed. | |
82 const int static_size = 16; | |
83 UConvScoper conv("utf-8"); | |
84 ASSERT_TRUE(conv.converter()); | |
85 ICUCharsetConverter converter(conv.converter()); | |
86 for (int i = static_size - 2; i <= static_size + 2; i++) { | |
87 // Make a string with the appropriate length. | |
88 base::string16 input; | |
89 for (int ch = 0; ch < i; ch++) | |
90 input.push_back('a'); | |
91 | |
92 RawCanonOutput<static_size> output; | |
93 converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()), | |
94 &output); | |
95 EXPECT_EQ(input.length(), static_cast<size_t>(output.length())); | |
96 } | |
97 } | |
98 | |
99 TEST(URLCanonIcuTest, QueryWithConverter) { | |
100 struct QueryCase { | |
101 const char* input8; | |
102 const wchar_t* input16; | |
103 const char* encoding; | |
104 const char* expected; | |
105 } query_cases[] = { | |
106 // Regular ASCII case in some different encodings. | |
107 {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"}, | |
108 {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"}, | |
109 {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"}, | |
110 // Chinese input/output | |
111 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", | |
112 "?q=%C4%E3%BA%C3"}, | |
113 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"}, | |
114 // Unencodable character in the destination character set should be | |
115 // escaped. The escape sequence unescapes to be the entity name: | |
116 // "?q=你" | |
117 {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", | |
118 "?q=Chinese%26%2365319%3B"}, | |
119 }; | |
120 | |
121 for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) { | |
122 Component out_comp; | |
123 | |
124 UConvScoper conv(query_cases[i].encoding); | |
125 ASSERT_TRUE(!query_cases[i].encoding || conv.converter()); | |
126 ICUCharsetConverter converter(conv.converter()); | |
127 | |
128 if (query_cases[i].input8) { | |
129 int len = static_cast<int>(strlen(query_cases[i].input8)); | |
130 Component in_comp(0, len); | |
131 std::string out_str; | |
132 | |
133 StdStringCanonOutput output(&out_str); | |
134 CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output, | |
135 &out_comp); | |
136 output.Complete(); | |
137 | |
138 EXPECT_EQ(query_cases[i].expected, out_str); | |
139 } | |
140 | |
141 if (query_cases[i].input16) { | |
142 base::string16 input16(WStringToUTF16(query_cases[i].input16)); | |
143 int len = static_cast<int>(input16.length()); | |
144 Component in_comp(0, len); | |
145 std::string out_str; | |
146 | |
147 StdStringCanonOutput output(&out_str); | |
148 CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output, | |
149 &out_comp); | |
150 output.Complete(); | |
151 | |
152 EXPECT_EQ(query_cases[i].expected, out_str); | |
153 } | |
154 } | |
155 | |
156 // Extra test for input with embedded NULL; | |
157 std::string out_str; | |
158 StdStringCanonOutput output(&out_str); | |
159 Component out_comp; | |
160 CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp); | |
161 output.Complete(); | |
162 EXPECT_EQ("?a%20%00z%01", out_str); | |
163 } | |
164 | |
165 } // namespace | |
166 | |
167 } // namespace url | |
OLD | NEW |