Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(312)

Side by Side Diff: url/url_canon_unittest.cc

Issue 257673002: Make it possible to build url/ without ICU on android. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix GN/GYP Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <errno.h> 5 #include <errno.h>
6 6
7 #include "base/macros.h"
7 #include "testing/gtest/include/gtest/gtest.h" 8 #include "testing/gtest/include/gtest/gtest.h"
8 #include "third_party/icu/source/common/unicode/ucnv.h"
9 #include "url/url_canon.h" 9 #include "url/url_canon.h"
10 #include "url/url_canon_icu.h"
11 #include "url/url_canon_internal.h" 10 #include "url/url_canon_internal.h"
12 #include "url/url_canon_stdstring.h" 11 #include "url/url_canon_stdstring.h"
13 #include "url/url_parse.h" 12 #include "url/url_parse.h"
14 #include "url/url_test_utils.h" 13 #include "url/url_test_utils.h"
15 14
16 // Some implementations of base/basictypes.h may define ARRAYSIZE. 15 // Some implementations of base/basictypes.h may define ARRAYSIZE.
17 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro 16 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
18 // which is in our version of basictypes.h. 17 // which is in our version of basictypes.h.
19 #ifndef ARRAYSIZE 18 #ifndef ARRAYSIZE
20 #define ARRAYSIZE ARRAYSIZE_UNSAFE 19 #define ARRAYSIZE ARRAYSIZE_UNSAFE
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 const char* username; 76 const char* username;
78 const char* password; 77 const char* password;
79 const char* host; 78 const char* host;
80 const char* port; 79 const char* port;
81 const char* path; 80 const char* path;
82 const char* query; 81 const char* query;
83 const char* ref; 82 const char* ref;
84 const char* expected; 83 const char* expected;
85 }; 84 };
86 85
87 // Wrapper around a UConverter object that managers creation and destruction.
88 class UConvScoper {
89 public:
90 explicit UConvScoper(const char* charset_name) {
91 UErrorCode err = U_ZERO_ERROR;
92 converter_ = ucnv_open(charset_name, &err);
93 }
94
95 ~UConvScoper() {
96 if (converter_)
97 ucnv_close(converter_);
98 }
99
100 // Returns the converter object, may be NULL.
101 UConverter* converter() const { return converter_; }
102
103 private:
104 UConverter* converter_;
105 };
106
107 // Magic string used in the replacements code that tells SetupReplComp to 86 // Magic string used in the replacements code that tells SetupReplComp to
108 // call the clear function. 87 // call the clear function.
109 const char kDeleteComp[] = "|"; 88 const char kDeleteComp[] = "|";
110 89
111 // Sets up a replacement for a single component. This is given pointers to 90 // Sets up a replacement for a single component. This is given pointers to
112 // the set and clear function for the component being replaced, and will 91 // the set and clear function for the component being replaced, and will
113 // either set the component (if it exists) or clear it (if the replacement 92 // either set the component (if it exists) or clear it (if the replacement
114 // string matches kDeleteComp). 93 // string matches kDeleteComp).
115 // 94 //
116 // This template is currently used only for the 8-bit case, and the strlen 95 // This template is currently used only for the 8-bit case, and the strlen
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
237 std::string input8_str(utf_cases[i].input8); 216 std::string input8_str(utf_cases[i].input8);
238 base::string16 input16_str(WStringToUTF16(utf_cases[i].input16)); 217 base::string16 input16_str(WStringToUTF16(utf_cases[i].input16));
239 EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str)); 218 EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str));
240 219
241 // UTF-8 -> UTF-16 220 // UTF-8 -> UTF-16
242 EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str)); 221 EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str));
243 } 222 }
244 } 223 }
245 } 224 }
246 225
247 TEST(URLCanonTest, ICUCharsetConverter) {
248 struct ICUCase {
249 const wchar_t* input;
250 const char* encoding;
251 const char* expected;
252 } icu_cases[] = {
253 // UTF-8.
254 {L"Hello, world", "utf-8", "Hello, world"},
255 {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
256 // Non-BMP UTF-8.
257 {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
258 // Big5
259 {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
260 // Unrepresentable character in the destination set.
261 {L"hello\x4f60\x06de\x597dworld", "big5", "hello\xa7\x41%26%231758%3B\xa6\x6 eworld"},
262 };
263
264 for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) {
265 UConvScoper conv(icu_cases[i].encoding);
266 ASSERT_TRUE(conv.converter() != NULL);
267 ICUCharsetConverter converter(conv.converter());
268
269 std::string str;
270 StdStringCanonOutput output(&str);
271
272 base::string16 input_str(WStringToUTF16(icu_cases[i].input));
273 int input_len = static_cast<int>(input_str.length());
274 converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
275 output.Complete();
276
277 EXPECT_STREQ(icu_cases[i].expected, str.c_str());
278 }
279
280 // Test string sizes around the resize boundary for the output to make sure
281 // the converter resizes as needed.
282 const int static_size = 16;
283 UConvScoper conv("utf-8");
284 ASSERT_TRUE(conv.converter());
285 ICUCharsetConverter converter(conv.converter());
286 for (int i = static_size - 2; i <= static_size + 2; i++) {
287 // Make a string with the appropriate length.
288 base::string16 input;
289 for (int ch = 0; ch < i; ch++)
290 input.push_back('a');
291
292 RawCanonOutput<static_size> output;
293 converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),
294 &output);
295 EXPECT_EQ(input.length(), static_cast<size_t>(output.length()));
296 }
297 }
298
299 TEST(URLCanonTest, Scheme) { 226 TEST(URLCanonTest, Scheme) {
300 // Here, we're mostly testing that unusual characters are handled properly. 227 // Here, we're mostly testing that unusual characters are handled properly.
301 // The canonicalizer doesn't do any parsing or whitespace detection. It will 228 // The canonicalizer doesn't do any parsing or whitespace detection. It will
302 // also do its best on error, and will escape funny sequences (these won't be 229 // also do its best on error, and will escape funny sequences (these won't be
303 // valid schemes and it will return error). 230 // valid schemes and it will return error).
304 // 231 //
305 // Note that the canonicalizer will append a colon to the output to separate 232 // Note that the canonicalizer will append a colon to the output to separate
306 // out the rest of the URL, which is not present in the input. We check, 233 // out the rest of the URL, which is not present in the input. We check,
307 // however, that the output range includes everything but the colon. 234 // however, that the output range includes everything but the colon.
308 ComponentCase scheme_cases[] = { 235 ComponentCase scheme_cases[] = {
(...skipping 882 matching lines...) Expand 10 before | Expand all | Expand 10 after
1191 bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp); 1118 bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp);
1192 output.Complete(); 1119 output.Complete();
1193 EXPECT_FALSE(success); 1120 EXPECT_FALSE(success);
1194 EXPECT_EQ("/ab%00c", out_str); 1121 EXPECT_EQ("/ab%00c", out_str);
1195 } 1122 }
1196 1123
1197 TEST(URLCanonTest, Query) { 1124 TEST(URLCanonTest, Query) {
1198 struct QueryCase { 1125 struct QueryCase {
1199 const char* input8; 1126 const char* input8;
1200 const wchar_t* input16; 1127 const wchar_t* input16;
1201 const char* encoding;
1202 const char* expected; 1128 const char* expected;
1203 } query_cases[] = { 1129 } query_cases[] = {
1204 // Regular ASCII case in some different encodings. 1130 // Regular ASCII case.
1205 {"foo=bar", L"foo=bar", NULL, "?foo=bar"}, 1131 {"foo=bar", L"foo=bar", "?foo=bar"},
1206 {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
1207 {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
1208 {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
1209 // Allow question marks in the query without escaping 1132 // Allow question marks in the query without escaping
1210 {"as?df", L"as?df", NULL, "?as?df"}, 1133 {"as?df", L"as?df", "?as?df"},
1211 // Always escape '#' since it would mark the ref. 1134 // Always escape '#' since it would mark the ref.
1212 {"as#df", L"as#df", NULL, "?as%23df"}, 1135 {"as#df", L"as#df", "?as%23df"},
1213 // Escape some questionable 8-bit characters, but never unescape. 1136 // Escape some questionable 8-bit characters, but never unescape.
1214 {"\x02hello\x7f bye", L"\x02hello\x7f bye", NULL, "?%02hello%7F%20bye"}, 1137 {"\x02hello\x7f bye", L"\x02hello\x7f bye", "?%02hello%7F%20bye"},
1215 {"%40%41123", L"%40%41123", NULL, "?%40%41123"}, 1138 {"%40%41123", L"%40%41123", "?%40%41123"},
1216 // Chinese input/output 1139 // Chinese input/output
1217 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", NULL, "?q=%E4%BD%A0%E5%A5% BD"}, 1140 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "?q=%E4%BD%A0%E5%A5%BD"},
1218 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", "?q=%C4%E3%BA%C3 "},
1219 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
1220 // Unencodable character in the destination character set should be
1221 // escaped. The escape sequence unescapes to be the entity name:
1222 // "?q=&#20320;"
1223 {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", "?q=Chinese%26%2 365319%3B"},
1224 // Invalid UTF-8/16 input should be replaced with invalid characters. 1141 // Invalid UTF-8/16 input should be replaced with invalid characters.
1225 {"q=\xed\xed", L"q=\xd800\xd800", NULL, "?q=%EF%BF%BD%EF%BF%BD"}, 1142 {"q=\xed\xed", L"q=\xd800\xd800", "?q=%EF%BF%BD%EF%BF%BD"},
1226 // Don't allow < or > because sometimes they are used for XSS if the 1143 // Don't allow < or > because sometimes they are used for XSS if the
1227 // URL is echoed in content. Firefox does this, IE doesn't. 1144 // URL is echoed in content. Firefox does this, IE doesn't.
1228 {"q=<asdf>", L"q=<asdf>", NULL, "?q=%3Casdf%3E"}, 1145 {"q=<asdf>", L"q=<asdf>", "?q=%3Casdf%3E"},
1229 // Escape double quotemarks in the query. 1146 // Escape double quotemarks in the query.
1230 {"q=\"asdf\"", L"q=\"asdf\"", NULL, "?q=%22asdf%22"}, 1147 {"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"},
1231 }; 1148 };
1232 1149
1233 for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) { 1150 for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) {
1234 Component out_comp; 1151 Component out_comp;
1235 1152
1236 UConvScoper conv(query_cases[i].encoding);
1237 ASSERT_TRUE(!query_cases[i].encoding || conv.converter());
1238 ICUCharsetConverter converter(conv.converter());
1239
1240 // Map NULL to a NULL converter pointer.
1241 ICUCharsetConverter* conv_pointer = &converter;
1242 if (!query_cases[i].encoding)
1243 conv_pointer = NULL;
1244
1245 if (query_cases[i].input8) { 1153 if (query_cases[i].input8) {
1246 int len = static_cast<int>(strlen(query_cases[i].input8)); 1154 int len = static_cast<int>(strlen(query_cases[i].input8));
1247 Component in_comp(0, len); 1155 Component in_comp(0, len);
1248 std::string out_str; 1156 std::string out_str;
1249 1157
1250 StdStringCanonOutput output(&out_str); 1158 StdStringCanonOutput output(&out_str);
1251 CanonicalizeQuery(query_cases[i].input8, in_comp, conv_pointer, &output, 1159 CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output,
1252 &out_comp); 1160 &out_comp);
1253 output.Complete(); 1161 output.Complete();
1254 1162
1255 EXPECT_EQ(query_cases[i].expected, out_str); 1163 EXPECT_EQ(query_cases[i].expected, out_str);
1256 } 1164 }
1257 1165
1258 if (query_cases[i].input16) { 1166 if (query_cases[i].input16) {
1259 base::string16 input16(WStringToUTF16(query_cases[i].input16)); 1167 base::string16 input16(WStringToUTF16(query_cases[i].input16));
1260 int len = static_cast<int>(input16.length()); 1168 int len = static_cast<int>(input16.length());
1261 Component in_comp(0, len); 1169 Component in_comp(0, len);
1262 std::string out_str; 1170 std::string out_str;
1263 1171
1264 StdStringCanonOutput output(&out_str); 1172 StdStringCanonOutput output(&out_str);
1265 CanonicalizeQuery(input16.c_str(), in_comp, conv_pointer, &output, 1173 CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp);
1266 &out_comp);
1267 output.Complete(); 1174 output.Complete();
1268 1175
1269 EXPECT_EQ(query_cases[i].expected, out_str); 1176 EXPECT_EQ(query_cases[i].expected, out_str);
1270 } 1177 }
1271 } 1178 }
1272 1179
1273 // Extra test for input with embedded NULL; 1180 // Extra test for input with embedded NULL;
1274 std::string out_str; 1181 std::string out_str;
1275 StdStringCanonOutput output(&out_str); 1182 StdStringCanonOutput output(&out_str);
1276 Component out_comp; 1183 Component out_comp;
(...skipping 940 matching lines...) Expand 10 before | Expand all | Expand 10 after
2217 repl_output.Complete(); 2124 repl_output.Complete();
2218 2125
2219 // Generate the expected string and check. 2126 // Generate the expected string and check.
2220 std::string expected("file:///foo?"); 2127 std::string expected("file:///foo?");
2221 for (size_t i = 0; i < new_query.length(); i++) 2128 for (size_t i = 0; i < new_query.length(); i++)
2222 expected.push_back('a'); 2129 expected.push_back('a');
2223 EXPECT_TRUE(expected == repl_str); 2130 EXPECT_TRUE(expected == repl_str);
2224 } 2131 }
2225 2132
2226 } // namespace url 2133 } // namespace url
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698