| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <errno.h> | 5 #include <errno.h> |
| 6 | 6 |
| 7 #include "base/macros.h" |
| 7 #include "testing/gtest/include/gtest/gtest.h" | 8 #include "testing/gtest/include/gtest/gtest.h" |
| 8 #include "third_party/icu/source/common/unicode/ucnv.h" | |
| 9 #include "url/url_canon.h" | 9 #include "url/url_canon.h" |
| 10 #include "url/url_canon_icu.h" | |
| 11 #include "url/url_canon_internal.h" | 10 #include "url/url_canon_internal.h" |
| 12 #include "url/url_canon_stdstring.h" | 11 #include "url/url_canon_stdstring.h" |
| 13 #include "url/url_parse.h" | 12 #include "url/url_parse.h" |
| 14 #include "url/url_test_utils.h" | 13 #include "url/url_test_utils.h" |
| 15 | 14 |
| 16 // Some implementations of base/basictypes.h may define ARRAYSIZE. | 15 // Some implementations of base/basictypes.h may define ARRAYSIZE. |
| 17 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro | 16 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro |
| 18 // which is in our version of basictypes.h. | 17 // which is in our version of basictypes.h. |
| 19 #ifndef ARRAYSIZE | 18 #ifndef ARRAYSIZE |
| 20 #define ARRAYSIZE ARRAYSIZE_UNSAFE | 19 #define ARRAYSIZE ARRAYSIZE_UNSAFE |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 77 const char* username; | 76 const char* username; |
| 78 const char* password; | 77 const char* password; |
| 79 const char* host; | 78 const char* host; |
| 80 const char* port; | 79 const char* port; |
| 81 const char* path; | 80 const char* path; |
| 82 const char* query; | 81 const char* query; |
| 83 const char* ref; | 82 const char* ref; |
| 84 const char* expected; | 83 const char* expected; |
| 85 }; | 84 }; |
| 86 | 85 |
| 87 // Wrapper around a UConverter object that managers creation and destruction. | |
| 88 class UConvScoper { | |
| 89 public: | |
| 90 explicit UConvScoper(const char* charset_name) { | |
| 91 UErrorCode err = U_ZERO_ERROR; | |
| 92 converter_ = ucnv_open(charset_name, &err); | |
| 93 } | |
| 94 | |
| 95 ~UConvScoper() { | |
| 96 if (converter_) | |
| 97 ucnv_close(converter_); | |
| 98 } | |
| 99 | |
| 100 // Returns the converter object, may be NULL. | |
| 101 UConverter* converter() const { return converter_; } | |
| 102 | |
| 103 private: | |
| 104 UConverter* converter_; | |
| 105 }; | |
| 106 | |
| 107 // Magic string used in the replacements code that tells SetupReplComp to | 86 // Magic string used in the replacements code that tells SetupReplComp to |
| 108 // call the clear function. | 87 // call the clear function. |
| 109 const char kDeleteComp[] = "|"; | 88 const char kDeleteComp[] = "|"; |
| 110 | 89 |
| 111 // Sets up a replacement for a single component. This is given pointers to | 90 // Sets up a replacement for a single component. This is given pointers to |
| 112 // the set and clear function for the component being replaced, and will | 91 // the set and clear function for the component being replaced, and will |
| 113 // either set the component (if it exists) or clear it (if the replacement | 92 // either set the component (if it exists) or clear it (if the replacement |
| 114 // string matches kDeleteComp). | 93 // string matches kDeleteComp). |
| 115 // | 94 // |
| 116 // This template is currently used only for the 8-bit case, and the strlen | 95 // This template is currently used only for the 8-bit case, and the strlen |
| (...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 237 std::string input8_str(utf_cases[i].input8); | 216 std::string input8_str(utf_cases[i].input8); |
| 238 base::string16 input16_str(WStringToUTF16(utf_cases[i].input16)); | 217 base::string16 input16_str(WStringToUTF16(utf_cases[i].input16)); |
| 239 EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str)); | 218 EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str)); |
| 240 | 219 |
| 241 // UTF-8 -> UTF-16 | 220 // UTF-8 -> UTF-16 |
| 242 EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str)); | 221 EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str)); |
| 243 } | 222 } |
| 244 } | 223 } |
| 245 } | 224 } |
| 246 | 225 |
| 247 TEST(URLCanonTest, ICUCharsetConverter) { | |
| 248 struct ICUCase { | |
| 249 const wchar_t* input; | |
| 250 const char* encoding; | |
| 251 const char* expected; | |
| 252 } icu_cases[] = { | |
| 253 // UTF-8. | |
| 254 {L"Hello, world", "utf-8", "Hello, world"}, | |
| 255 {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"}, | |
| 256 // Non-BMP UTF-8. | |
| 257 {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"}, | |
| 258 // Big5 | |
| 259 {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"}, | |
| 260 // Unrepresentable character in the destination set. | |
| 261 {L"hello\x4f60\x06de\x597dworld", "big5", "hello\xa7\x41%26%231758%3B\xa6\x6
eworld"}, | |
| 262 }; | |
| 263 | |
| 264 for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) { | |
| 265 UConvScoper conv(icu_cases[i].encoding); | |
| 266 ASSERT_TRUE(conv.converter() != NULL); | |
| 267 ICUCharsetConverter converter(conv.converter()); | |
| 268 | |
| 269 std::string str; | |
| 270 StdStringCanonOutput output(&str); | |
| 271 | |
| 272 base::string16 input_str(WStringToUTF16(icu_cases[i].input)); | |
| 273 int input_len = static_cast<int>(input_str.length()); | |
| 274 converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); | |
| 275 output.Complete(); | |
| 276 | |
| 277 EXPECT_STREQ(icu_cases[i].expected, str.c_str()); | |
| 278 } | |
| 279 | |
| 280 // Test string sizes around the resize boundary for the output to make sure | |
| 281 // the converter resizes as needed. | |
| 282 const int static_size = 16; | |
| 283 UConvScoper conv("utf-8"); | |
| 284 ASSERT_TRUE(conv.converter()); | |
| 285 ICUCharsetConverter converter(conv.converter()); | |
| 286 for (int i = static_size - 2; i <= static_size + 2; i++) { | |
| 287 // Make a string with the appropriate length. | |
| 288 base::string16 input; | |
| 289 for (int ch = 0; ch < i; ch++) | |
| 290 input.push_back('a'); | |
| 291 | |
| 292 RawCanonOutput<static_size> output; | |
| 293 converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()), | |
| 294 &output); | |
| 295 EXPECT_EQ(input.length(), static_cast<size_t>(output.length())); | |
| 296 } | |
| 297 } | |
| 298 | |
| 299 TEST(URLCanonTest, Scheme) { | 226 TEST(URLCanonTest, Scheme) { |
| 300 // Here, we're mostly testing that unusual characters are handled properly. | 227 // Here, we're mostly testing that unusual characters are handled properly. |
| 301 // The canonicalizer doesn't do any parsing or whitespace detection. It will | 228 // The canonicalizer doesn't do any parsing or whitespace detection. It will |
| 302 // also do its best on error, and will escape funny sequences (these won't be | 229 // also do its best on error, and will escape funny sequences (these won't be |
| 303 // valid schemes and it will return error). | 230 // valid schemes and it will return error). |
| 304 // | 231 // |
| 305 // Note that the canonicalizer will append a colon to the output to separate | 232 // Note that the canonicalizer will append a colon to the output to separate |
| 306 // out the rest of the URL, which is not present in the input. We check, | 233 // out the rest of the URL, which is not present in the input. We check, |
| 307 // however, that the output range includes everything but the colon. | 234 // however, that the output range includes everything but the colon. |
| 308 ComponentCase scheme_cases[] = { | 235 ComponentCase scheme_cases[] = { |
| (...skipping 882 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1191 bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp); | 1118 bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp); |
| 1192 output.Complete(); | 1119 output.Complete(); |
| 1193 EXPECT_FALSE(success); | 1120 EXPECT_FALSE(success); |
| 1194 EXPECT_EQ("/ab%00c", out_str); | 1121 EXPECT_EQ("/ab%00c", out_str); |
| 1195 } | 1122 } |
| 1196 | 1123 |
| 1197 TEST(URLCanonTest, Query) { | 1124 TEST(URLCanonTest, Query) { |
| 1198 struct QueryCase { | 1125 struct QueryCase { |
| 1199 const char* input8; | 1126 const char* input8; |
| 1200 const wchar_t* input16; | 1127 const wchar_t* input16; |
| 1201 const char* encoding; | |
| 1202 const char* expected; | 1128 const char* expected; |
| 1203 } query_cases[] = { | 1129 } query_cases[] = { |
| 1204 // Regular ASCII case in some different encodings. | 1130 // Regular ASCII case. |
| 1205 {"foo=bar", L"foo=bar", NULL, "?foo=bar"}, | 1131 {"foo=bar", L"foo=bar", "?foo=bar"}, |
| 1206 {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"}, | |
| 1207 {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"}, | |
| 1208 {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"}, | |
| 1209 // Allow question marks in the query without escaping | 1132 // Allow question marks in the query without escaping |
| 1210 {"as?df", L"as?df", NULL, "?as?df"}, | 1133 {"as?df", L"as?df", "?as?df"}, |
| 1211 // Always escape '#' since it would mark the ref. | 1134 // Always escape '#' since it would mark the ref. |
| 1212 {"as#df", L"as#df", NULL, "?as%23df"}, | 1135 {"as#df", L"as#df", "?as%23df"}, |
| 1213 // Escape some questionable 8-bit characters, but never unescape. | 1136 // Escape some questionable 8-bit characters, but never unescape. |
| 1214 {"\x02hello\x7f bye", L"\x02hello\x7f bye", NULL, "?%02hello%7F%20bye"}, | 1137 {"\x02hello\x7f bye", L"\x02hello\x7f bye", "?%02hello%7F%20bye"}, |
| 1215 {"%40%41123", L"%40%41123", NULL, "?%40%41123"}, | 1138 {"%40%41123", L"%40%41123", "?%40%41123"}, |
| 1216 // Chinese input/output | 1139 // Chinese input/output |
| 1217 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", NULL, "?q=%E4%BD%A0%E5%A5%
BD"}, | 1140 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "?q=%E4%BD%A0%E5%A5%BD"}, |
| 1218 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", "?q=%C4%E3%BA%C3
"}, | |
| 1219 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"}, | |
| 1220 // Unencodable character in the destination character set should be | |
| 1221 // escaped. The escape sequence unescapes to be the entity name: | |
| 1222 // "?q=你" | |
| 1223 {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", "?q=Chinese%26%2
365319%3B"}, | |
| 1224 // Invalid UTF-8/16 input should be replaced with invalid characters. | 1141 // Invalid UTF-8/16 input should be replaced with invalid characters. |
| 1225 {"q=\xed\xed", L"q=\xd800\xd800", NULL, "?q=%EF%BF%BD%EF%BF%BD"}, | 1142 {"q=\xed\xed", L"q=\xd800\xd800", "?q=%EF%BF%BD%EF%BF%BD"}, |
| 1226 // Don't allow < or > because sometimes they are used for XSS if the | 1143 // Don't allow < or > because sometimes they are used for XSS if the |
| 1227 // URL is echoed in content. Firefox does this, IE doesn't. | 1144 // URL is echoed in content. Firefox does this, IE doesn't. |
| 1228 {"q=<asdf>", L"q=<asdf>", NULL, "?q=%3Casdf%3E"}, | 1145 {"q=<asdf>", L"q=<asdf>", "?q=%3Casdf%3E"}, |
| 1229 // Escape double quotemarks in the query. | 1146 // Escape double quotemarks in the query. |
| 1230 {"q=\"asdf\"", L"q=\"asdf\"", NULL, "?q=%22asdf%22"}, | 1147 {"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"}, |
| 1231 }; | 1148 }; |
| 1232 | 1149 |
| 1233 for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) { | 1150 for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) { |
| 1234 Component out_comp; | 1151 Component out_comp; |
| 1235 | 1152 |
| 1236 UConvScoper conv(query_cases[i].encoding); | |
| 1237 ASSERT_TRUE(!query_cases[i].encoding || conv.converter()); | |
| 1238 ICUCharsetConverter converter(conv.converter()); | |
| 1239 | |
| 1240 // Map NULL to a NULL converter pointer. | |
| 1241 ICUCharsetConverter* conv_pointer = &converter; | |
| 1242 if (!query_cases[i].encoding) | |
| 1243 conv_pointer = NULL; | |
| 1244 | |
| 1245 if (query_cases[i].input8) { | 1153 if (query_cases[i].input8) { |
| 1246 int len = static_cast<int>(strlen(query_cases[i].input8)); | 1154 int len = static_cast<int>(strlen(query_cases[i].input8)); |
| 1247 Component in_comp(0, len); | 1155 Component in_comp(0, len); |
| 1248 std::string out_str; | 1156 std::string out_str; |
| 1249 | 1157 |
| 1250 StdStringCanonOutput output(&out_str); | 1158 StdStringCanonOutput output(&out_str); |
| 1251 CanonicalizeQuery(query_cases[i].input8, in_comp, conv_pointer, &output, | 1159 CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output, |
| 1252 &out_comp); | 1160 &out_comp); |
| 1253 output.Complete(); | 1161 output.Complete(); |
| 1254 | 1162 |
| 1255 EXPECT_EQ(query_cases[i].expected, out_str); | 1163 EXPECT_EQ(query_cases[i].expected, out_str); |
| 1256 } | 1164 } |
| 1257 | 1165 |
| 1258 if (query_cases[i].input16) { | 1166 if (query_cases[i].input16) { |
| 1259 base::string16 input16(WStringToUTF16(query_cases[i].input16)); | 1167 base::string16 input16(WStringToUTF16(query_cases[i].input16)); |
| 1260 int len = static_cast<int>(input16.length()); | 1168 int len = static_cast<int>(input16.length()); |
| 1261 Component in_comp(0, len); | 1169 Component in_comp(0, len); |
| 1262 std::string out_str; | 1170 std::string out_str; |
| 1263 | 1171 |
| 1264 StdStringCanonOutput output(&out_str); | 1172 StdStringCanonOutput output(&out_str); |
| 1265 CanonicalizeQuery(input16.c_str(), in_comp, conv_pointer, &output, | 1173 CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp); |
| 1266 &out_comp); | |
| 1267 output.Complete(); | 1174 output.Complete(); |
| 1268 | 1175 |
| 1269 EXPECT_EQ(query_cases[i].expected, out_str); | 1176 EXPECT_EQ(query_cases[i].expected, out_str); |
| 1270 } | 1177 } |
| 1271 } | 1178 } |
| 1272 | 1179 |
| 1273 // Extra test for input with embedded NULL; | 1180 // Extra test for input with embedded NULL; |
| 1274 std::string out_str; | 1181 std::string out_str; |
| 1275 StdStringCanonOutput output(&out_str); | 1182 StdStringCanonOutput output(&out_str); |
| 1276 Component out_comp; | 1183 Component out_comp; |
| (...skipping 940 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2217 repl_output.Complete(); | 2124 repl_output.Complete(); |
| 2218 | 2125 |
| 2219 // Generate the expected string and check. | 2126 // Generate the expected string and check. |
| 2220 std::string expected("file:///foo?"); | 2127 std::string expected("file:///foo?"); |
| 2221 for (size_t i = 0; i < new_query.length(); i++) | 2128 for (size_t i = 0; i < new_query.length(); i++) |
| 2222 expected.push_back('a'); | 2129 expected.push_back('a'); |
| 2223 EXPECT_TRUE(expected == repl_str); | 2130 EXPECT_TRUE(expected == repl_str); |
| 2224 } | 2131 } |
| 2225 | 2132 |
| 2226 } // namespace url | 2133 } // namespace url |
| OLD | NEW |