url/url_canon_unittest.cc - Issue 257673002: Make it possible to build url/ without ICU on android.

Side by Side Diff: url/url_canon_unittest.cc

Issue 257673002: Make it possible to build url/ without ICU on android. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fix GN/GYP Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <errno.h>	5 #include <errno.h>

6	6

	7 #include "base/macros.h"

7 #include "testing/gtest/include/gtest/gtest.h"	8 #include "testing/gtest/include/gtest/gtest.h"

8 #include "third_party/icu/source/common/unicode/ucnv.h"

9 #include "url/url_canon.h"	9 #include "url/url_canon.h"

10 #include "url/url_canon_icu.h"

11 #include "url/url_canon_internal.h"	10 #include "url/url_canon_internal.h"

12 #include "url/url_canon_stdstring.h"	11 #include "url/url_canon_stdstring.h"

13 #include "url/url_parse.h"	12 #include "url/url_parse.h"

14 #include "url/url_test_utils.h"	13 #include "url/url_test_utils.h"

15	14

16 // Some implementations of base/basictypes.h may define ARRAYSIZE.	15 // Some implementations of base/basictypes.h may define ARRAYSIZE.

17 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro	16 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro

18 // which is in our version of basictypes.h.	17 // which is in our version of basictypes.h.

19 #ifndef ARRAYSIZE	18 #ifndef ARRAYSIZE

20 #define ARRAYSIZE ARRAYSIZE_UNSAFE	19 #define ARRAYSIZE ARRAYSIZE_UNSAFE

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 const char* username;	76 const char* username;

78 const char* password;	77 const char* password;

79 const char* host;	78 const char* host;

80 const char* port;	79 const char* port;

81 const char* path;	80 const char* path;

82 const char* query;	81 const char* query;

83 const char* ref;	82 const char* ref;

84 const char* expected;	83 const char* expected;

85 };	84 };

86	85

87 // Wrapper around a UConverter object that managers creation and destruction.

88 class UConvScoper {

89 public:

90 explicit UConvScoper(const char* charset_name) {

91 UErrorCode err = U_ZERO_ERROR;

92 converter_ = ucnv_open(charset_name, &err);

93 }

94

95 ~UConvScoper() {

96 if (converter_)

97 ucnv_close(converter_);

98 }

99

100 // Returns the converter object, may be NULL.

101 UConverter* converter() const { return converter_; }

102

103 private:

104 UConverter* converter_;

105 };

106

107 // Magic string used in the replacements code that tells SetupReplComp to	86 // Magic string used in the replacements code that tells SetupReplComp to

108 // call the clear function.	87 // call the clear function.

109 const char kDeleteComp[] = "\|";	88 const char kDeleteComp[] = "\|";

110	89

111 // Sets up a replacement for a single component. This is given pointers to	90 // Sets up a replacement for a single component. This is given pointers to

112 // the set and clear function for the component being replaced, and will	91 // the set and clear function for the component being replaced, and will

113 // either set the component (if it exists) or clear it (if the replacement	92 // either set the component (if it exists) or clear it (if the replacement

114 // string matches kDeleteComp).	93 // string matches kDeleteComp).

115 //	94 //

116 // This template is currently used only for the 8-bit case, and the strlen	95 // This template is currently used only for the 8-bit case, and the strlen

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
237 std::string input8_str(utf_cases[i].input8);	216 std::string input8_str(utf_cases[i].input8);

238 base::string16 input16_str(WStringToUTF16(utf_cases[i].input16));	217 base::string16 input16_str(WStringToUTF16(utf_cases[i].input16));

239 EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str));	218 EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str));

240	219

241 // UTF-8 -> UTF-16	220 // UTF-8 -> UTF-16

242 EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str));	221 EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str));

243 }	222 }

244 }	223 }

245 }	224 }

246	225

247 TEST(URLCanonTest, ICUCharsetConverter) {

248 struct ICUCase {

249 const wchar_t* input;

250 const char* encoding;

251 const char* expected;

252 } icu_cases[] = {

253 // UTF-8.

254 {L"Hello, world", "utf-8", "Hello, world"},

255 {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},

256 // Non-BMP UTF-8.

257 {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},

258 // Big5

259 {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},

260 // Unrepresentable character in the destination set.

261 {L"hello\x4f60\x06de\x597dworld", "big5", "hello\xa7\x41%26%231758%3B\xa6\x6 eworld"},

262 };

263

264 for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) {

265 UConvScoper conv(icu_cases[i].encoding);

266 ASSERT_TRUE(conv.converter() != NULL);

267 ICUCharsetConverter converter(conv.converter());

268

269 std::string str;

270 StdStringCanonOutput output(&str);

271

272 base::string16 input_str(WStringToUTF16(icu_cases[i].input));

273 int input_len = static_cast<int>(input_str.length());

274 converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);

275 output.Complete();

276

277 EXPECT_STREQ(icu_cases[i].expected, str.c_str());

278 }

279

280 // Test string sizes around the resize boundary for the output to make sure

281 // the converter resizes as needed.

282 const int static_size = 16;

283 UConvScoper conv("utf-8");

284 ASSERT_TRUE(conv.converter());

285 ICUCharsetConverter converter(conv.converter());

286 for (int i = static_size - 2; i <= static_size + 2; i++) {

287 // Make a string with the appropriate length.

288 base::string16 input;

289 for (int ch = 0; ch < i; ch++)

290 input.push_back('a');

291

292 RawCanonOutput<static_size> output;

293 converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),

294 &output);

295 EXPECT_EQ(input.length(), static_cast<size_t>(output.length()));

296 }

297 }

298

299 TEST(URLCanonTest, Scheme) {	226 TEST(URLCanonTest, Scheme) {

300 // Here, we're mostly testing that unusual characters are handled properly.	227 // Here, we're mostly testing that unusual characters are handled properly.

301 // The canonicalizer doesn't do any parsing or whitespace detection. It will	228 // The canonicalizer doesn't do any parsing or whitespace detection. It will

302 // also do its best on error, and will escape funny sequences (these won't be	229 // also do its best on error, and will escape funny sequences (these won't be

303 // valid schemes and it will return error).	230 // valid schemes and it will return error).

304 //	231 //

305 // Note that the canonicalizer will append a colon to the output to separate	232 // Note that the canonicalizer will append a colon to the output to separate

306 // out the rest of the URL, which is not present in the input. We check,	233 // out the rest of the URL, which is not present in the input. We check,

307 // however, that the output range includes everything but the colon.	234 // however, that the output range includes everything but the colon.

308 ComponentCase scheme_cases[] = {	235 ComponentCase scheme_cases[] = {

(...skipping 882 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1191 bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp);	1118 bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp);

1192 output.Complete();	1119 output.Complete();

1193 EXPECT_FALSE(success);	1120 EXPECT_FALSE(success);

1194 EXPECT_EQ("/ab%00c", out_str);	1121 EXPECT_EQ("/ab%00c", out_str);

1195 }	1122 }

1196	1123

1197 TEST(URLCanonTest, Query) {	1124 TEST(URLCanonTest, Query) {

1198 struct QueryCase {	1125 struct QueryCase {

1199 const char* input8;	1126 const char* input8;

1200 const wchar_t* input16;	1127 const wchar_t* input16;

1201 const char* encoding;

1202 const char* expected;	1128 const char* expected;

1203 } query_cases[] = {	1129 } query_cases[] = {

1204 // Regular ASCII case in some different encodings.	1130 // Regular ASCII case.

1205 {"foo=bar", L"foo=bar", NULL, "?foo=bar"},	1131 {"foo=bar", L"foo=bar", "?foo=bar"},

1206 {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},

1207 {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},

1208 {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},

1209 // Allow question marks in the query without escaping	1132 // Allow question marks in the query without escaping

1210 {"as?df", L"as?df", NULL, "?as?df"},	1133 {"as?df", L"as?df", "?as?df"},

1211 // Always escape '#' since it would mark the ref.	1134 // Always escape '#' since it would mark the ref.

1212 {"as#df", L"as#df", NULL, "?as%23df"},	1135 {"as#df", L"as#df", "?as%23df"},

1213 // Escape some questionable 8-bit characters, but never unescape.	1136 // Escape some questionable 8-bit characters, but never unescape.

1214 {"\x02hello\x7f bye", L"\x02hello\x7f bye", NULL, "?%02hello%7F%20bye"},	1137 {"\x02hello\x7f bye", L"\x02hello\x7f bye", "?%02hello%7F%20bye"},

1215 {"%40%41123", L"%40%41123", NULL, "?%40%41123"},	1138 {"%40%41123", L"%40%41123", "?%40%41123"},

1216 // Chinese input/output	1139 // Chinese input/output

1217 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", NULL, "?q=%E4%BD%A0%E5%A5% BD"},	1140 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "?q=%E4%BD%A0%E5%A5%BD"},

1218 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", "?q=%C4%E3%BA%C3 "},

1219 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},

1220 // Unencodable character in the destination character set should be

1221 // escaped. The escape sequence unescapes to be the entity name:

1222 // "?q=你"

1223 {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", "?q=Chinese%26%2 365319%3B"},

1224 // Invalid UTF-8/16 input should be replaced with invalid characters.	1141 // Invalid UTF-8/16 input should be replaced with invalid characters.

1225 {"q=\xed\xed", L"q=\xd800\xd800", NULL, "?q=%EF%BF%BD%EF%BF%BD"},	1142 {"q=\xed\xed", L"q=\xd800\xd800", "?q=%EF%BF%BD%EF%BF%BD"},

1226 // Don't allow < or > because sometimes they are used for XSS if the	1143 // Don't allow < or > because sometimes they are used for XSS if the

1227 // URL is echoed in content. Firefox does this, IE doesn't.	1144 // URL is echoed in content. Firefox does this, IE doesn't.

1228 {"q=<asdf>", L"q=<asdf>", NULL, "?q=%3Casdf%3E"},	1145 {"q=<asdf>", L"q=<asdf>", "?q=%3Casdf%3E"},

1229 // Escape double quotemarks in the query.	1146 // Escape double quotemarks in the query.

1230 {"q=\"asdf\"", L"q=\"asdf\"", NULL, "?q=%22asdf%22"},	1147 {"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"},

1231 };	1148 };

1232	1149

1233 for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) {	1150 for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) {

1234 Component out_comp;	1151 Component out_comp;

1235	1152

1236 UConvScoper conv(query_cases[i].encoding);

1237 ASSERT_TRUE(!query_cases[i].encoding \|\| conv.converter());

1238 ICUCharsetConverter converter(conv.converter());

1239

1240 // Map NULL to a NULL converter pointer.

1241 ICUCharsetConverter* conv_pointer = &converter;

1242 if (!query_cases[i].encoding)

1243 conv_pointer = NULL;

1244

1245 if (query_cases[i].input8) {	1153 if (query_cases[i].input8) {

1246 int len = static_cast<int>(strlen(query_cases[i].input8));	1154 int len = static_cast<int>(strlen(query_cases[i].input8));

1247 Component in_comp(0, len);	1155 Component in_comp(0, len);

1248 std::string out_str;	1156 std::string out_str;

1249	1157

1250 StdStringCanonOutput output(&out_str);	1158 StdStringCanonOutput output(&out_str);

1251 CanonicalizeQuery(query_cases[i].input8, in_comp, conv_pointer, &output,	1159 CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output,

1252 &out_comp);	1160 &out_comp);

1253 output.Complete();	1161 output.Complete();

1254	1162

1255 EXPECT_EQ(query_cases[i].expected, out_str);	1163 EXPECT_EQ(query_cases[i].expected, out_str);

1256 }	1164 }

1257	1165

1258 if (query_cases[i].input16) {	1166 if (query_cases[i].input16) {

1259 base::string16 input16(WStringToUTF16(query_cases[i].input16));	1167 base::string16 input16(WStringToUTF16(query_cases[i].input16));

1260 int len = static_cast<int>(input16.length());	1168 int len = static_cast<int>(input16.length());

1261 Component in_comp(0, len);	1169 Component in_comp(0, len);

1262 std::string out_str;	1170 std::string out_str;

1263	1171

1264 StdStringCanonOutput output(&out_str);	1172 StdStringCanonOutput output(&out_str);

1265 CanonicalizeQuery(input16.c_str(), in_comp, conv_pointer, &output,	1173 CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp);

1266 &out_comp);

1267 output.Complete();	1174 output.Complete();

1268	1175

1269 EXPECT_EQ(query_cases[i].expected, out_str);	1176 EXPECT_EQ(query_cases[i].expected, out_str);

1270 }	1177 }

1271 }	1178 }

1272	1179

1273 // Extra test for input with embedded NULL;	1180 // Extra test for input with embedded NULL;

1274 std::string out_str;	1181 std::string out_str;

1275 StdStringCanonOutput output(&out_str);	1182 StdStringCanonOutput output(&out_str);

1276 Component out_comp;	1183 Component out_comp;

(...skipping 940 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2217 repl_output.Complete();	2124 repl_output.Complete();

2218	2125

2219 // Generate the expected string and check.	2126 // Generate the expected string and check.

2220 std::string expected("file:///foo?");	2127 std::string expected("file:///foo?");

2221 for (size_t i = 0; i < new_query.length(); i++)	2128 for (size_t i = 0; i < new_query.length(); i++)

2222 expected.push_back('a');	2129 expected.push_back('a');

2223 EXPECT_TRUE(expected == repl_str);	2130 EXPECT_TRUE(expected == repl_str);

2224 }	2131 }

2225	2132

2226 } // namespace url	2133 } // namespace url

OLD	NEW

« url/url_canon_icu_unittest.cc ('K') | « url/url_canon_icu_unittest.cc ('k') | url/url_srcs.gypi » ('j') | no next file with comments »