| Index: url/url_canon_unittest.cc
|
| diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
|
| index 55f9608006c8577e78a1443a773975af5289c546..9a766e32faaed60a0f890fcd3a57c63ddf06b204 100644
|
| --- a/url/url_canon_unittest.cc
|
| +++ b/url/url_canon_unittest.cc
|
| @@ -4,10 +4,9 @@
|
|
|
| #include <errno.h>
|
|
|
| +#include "base/macros.h"
|
| #include "testing/gtest/include/gtest/gtest.h"
|
| -#include "third_party/icu/source/common/unicode/ucnv.h"
|
| #include "url/url_canon.h"
|
| -#include "url/url_canon_icu.h"
|
| #include "url/url_canon_internal.h"
|
| #include "url/url_canon_stdstring.h"
|
| #include "url/url_parse.h"
|
| @@ -84,26 +83,6 @@ struct ReplaceCase {
|
| const char* expected;
|
| };
|
|
|
| -// Wrapper around a UConverter object that managers creation and destruction.
|
| -class UConvScoper {
|
| - public:
|
| - explicit UConvScoper(const char* charset_name) {
|
| - UErrorCode err = U_ZERO_ERROR;
|
| - converter_ = ucnv_open(charset_name, &err);
|
| - }
|
| -
|
| - ~UConvScoper() {
|
| - if (converter_)
|
| - ucnv_close(converter_);
|
| - }
|
| -
|
| - // Returns the converter object, may be NULL.
|
| - UConverter* converter() const { return converter_; }
|
| -
|
| - private:
|
| - UConverter* converter_;
|
| -};
|
| -
|
| // Magic string used in the replacements code that tells SetupReplComp to
|
| // call the clear function.
|
| const char kDeleteComp[] = "|";
|
| @@ -244,58 +223,6 @@ TEST(URLCanonTest, UTF) {
|
| }
|
| }
|
|
|
| -TEST(URLCanonTest, ICUCharsetConverter) {
|
| - struct ICUCase {
|
| - const wchar_t* input;
|
| - const char* encoding;
|
| - const char* expected;
|
| - } icu_cases[] = {
|
| - // UTF-8.
|
| - {L"Hello, world", "utf-8", "Hello, world"},
|
| - {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
|
| - // Non-BMP UTF-8.
|
| - {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
|
| - // Big5
|
| - {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
|
| - // Unrepresentable character in the destination set.
|
| - {L"hello\x4f60\x06de\x597dworld", "big5", "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
|
| - };
|
| -
|
| - for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) {
|
| - UConvScoper conv(icu_cases[i].encoding);
|
| - ASSERT_TRUE(conv.converter() != NULL);
|
| - ICUCharsetConverter converter(conv.converter());
|
| -
|
| - std::string str;
|
| - StdStringCanonOutput output(&str);
|
| -
|
| - base::string16 input_str(WStringToUTF16(icu_cases[i].input));
|
| - int input_len = static_cast<int>(input_str.length());
|
| - converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
|
| - output.Complete();
|
| -
|
| - EXPECT_STREQ(icu_cases[i].expected, str.c_str());
|
| - }
|
| -
|
| - // Test string sizes around the resize boundary for the output to make sure
|
| - // the converter resizes as needed.
|
| - const int static_size = 16;
|
| - UConvScoper conv("utf-8");
|
| - ASSERT_TRUE(conv.converter());
|
| - ICUCharsetConverter converter(conv.converter());
|
| - for (int i = static_size - 2; i <= static_size + 2; i++) {
|
| - // Make a string with the appropriate length.
|
| - base::string16 input;
|
| - for (int ch = 0; ch < i; ch++)
|
| - input.push_back('a');
|
| -
|
| - RawCanonOutput<static_size> output;
|
| - converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),
|
| - &output);
|
| - EXPECT_EQ(input.length(), static_cast<size_t>(output.length()));
|
| - }
|
| -}
|
| -
|
| TEST(URLCanonTest, Scheme) {
|
| // Here, we're mostly testing that unusual characters are handled properly.
|
| // The canonicalizer doesn't do any parsing or whitespace detection. It will
|
| @@ -1198,57 +1125,38 @@ TEST(URLCanonTest, Query) {
|
| struct QueryCase {
|
| const char* input8;
|
| const wchar_t* input16;
|
| - const char* encoding;
|
| const char* expected;
|
| } query_cases[] = {
|
| - // Regular ASCII case in some different encodings.
|
| - {"foo=bar", L"foo=bar", NULL, "?foo=bar"},
|
| - {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
|
| - {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
|
| - {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
|
| + // Regular ASCII case.
|
| + {"foo=bar", L"foo=bar", "?foo=bar"},
|
| // Allow question marks in the query without escaping
|
| - {"as?df", L"as?df", NULL, "?as?df"},
|
| + {"as?df", L"as?df", "?as?df"},
|
| // Always escape '#' since it would mark the ref.
|
| - {"as#df", L"as#df", NULL, "?as%23df"},
|
| + {"as#df", L"as#df", "?as%23df"},
|
| // Escape some questionable 8-bit characters, but never unescape.
|
| - {"\x02hello\x7f bye", L"\x02hello\x7f bye", NULL, "?%02hello%7F%20bye"},
|
| - {"%40%41123", L"%40%41123", NULL, "?%40%41123"},
|
| + {"\x02hello\x7f bye", L"\x02hello\x7f bye", "?%02hello%7F%20bye"},
|
| + {"%40%41123", L"%40%41123", "?%40%41123"},
|
| // Chinese input/output
|
| - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", NULL, "?q=%E4%BD%A0%E5%A5%BD"},
|
| - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", "?q=%C4%E3%BA%C3"},
|
| - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
|
| - // Unencodable character in the destination character set should be
|
| - // escaped. The escape sequence unescapes to be the entity name:
|
| - // "?q=你"
|
| - {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", "?q=Chinese%26%2365319%3B"},
|
| + {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "?q=%E4%BD%A0%E5%A5%BD"},
|
| // Invalid UTF-8/16 input should be replaced with invalid characters.
|
| - {"q=\xed\xed", L"q=\xd800\xd800", NULL, "?q=%EF%BF%BD%EF%BF%BD"},
|
| + {"q=\xed\xed", L"q=\xd800\xd800", "?q=%EF%BF%BD%EF%BF%BD"},
|
| // Don't allow < or > because sometimes they are used for XSS if the
|
| // URL is echoed in content. Firefox does this, IE doesn't.
|
| - {"q=<asdf>", L"q=<asdf>", NULL, "?q=%3Casdf%3E"},
|
| + {"q=<asdf>", L"q=<asdf>", "?q=%3Casdf%3E"},
|
| // Escape double quotemarks in the query.
|
| - {"q=\"asdf\"", L"q=\"asdf\"", NULL, "?q=%22asdf%22"},
|
| + {"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"},
|
| };
|
|
|
| for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) {
|
| Component out_comp;
|
|
|
| - UConvScoper conv(query_cases[i].encoding);
|
| - ASSERT_TRUE(!query_cases[i].encoding || conv.converter());
|
| - ICUCharsetConverter converter(conv.converter());
|
| -
|
| - // Map NULL to a NULL converter pointer.
|
| - ICUCharsetConverter* conv_pointer = &converter;
|
| - if (!query_cases[i].encoding)
|
| - conv_pointer = NULL;
|
| -
|
| if (query_cases[i].input8) {
|
| int len = static_cast<int>(strlen(query_cases[i].input8));
|
| Component in_comp(0, len);
|
| std::string out_str;
|
|
|
| StdStringCanonOutput output(&out_str);
|
| - CanonicalizeQuery(query_cases[i].input8, in_comp, conv_pointer, &output,
|
| + CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output,
|
| &out_comp);
|
| output.Complete();
|
|
|
| @@ -1262,8 +1170,7 @@ TEST(URLCanonTest, Query) {
|
| std::string out_str;
|
|
|
| StdStringCanonOutput output(&out_str);
|
| - CanonicalizeQuery(input16.c_str(), in_comp, conv_pointer, &output,
|
| - &out_comp);
|
| + CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp);
|
| output.Complete();
|
|
|
| EXPECT_EQ(query_cases[i].expected, out_str);
|
|
|