| Index: url/url_canon_icu_unittest.cc
|
| diff --git a/url/url_canon_icu_unittest.cc b/url/url_canon_icu_unittest.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..83f6083c0b7cccc875eff63599acfe55dae39314
|
| --- /dev/null
|
| +++ b/url/url_canon_icu_unittest.cc
|
| @@ -0,0 +1,167 @@
|
| +// Copyright 2013 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "base/macros.h"
|
| +#include "testing/gtest/include/gtest/gtest.h"
|
| +#include "third_party/icu/source/common/unicode/ucnv.h"
|
| +#include "url/url_canon.h"
|
| +#include "url/url_canon_icu.h"
|
| +#include "url/url_canon_stdstring.h"
|
| +#include "url/url_test_utils.h"
|
| +
|
| +// Some implementations of base/basictypes.h may define ARRAYSIZE.
|
| +// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
|
| +// which is in our version of basictypes.h.
|
| +#ifndef ARRAYSIZE
|
| +#define ARRAYSIZE ARRAYSIZE_UNSAFE
|
| +#endif
|
| +
|
| +namespace url {
|
| +
|
| +using test_utils::WStringToUTF16;
|
| +
|
| +namespace {
|
| +
|
| +// Wrapper around a UConverter object that managers creation and destruction.
|
| +class UConvScoper {
|
| + public:
|
| + explicit UConvScoper(const char* charset_name) {
|
| + UErrorCode err = U_ZERO_ERROR;
|
| + converter_ = ucnv_open(charset_name, &err);
|
| + }
|
| +
|
| + ~UConvScoper() {
|
| + if (converter_)
|
| + ucnv_close(converter_);
|
| + }
|
| +
|
| + // Returns the converter object, may be NULL.
|
| + UConverter* converter() const { return converter_; }
|
| +
|
| + private:
|
| + UConverter* converter_;
|
| +};
|
| +
|
| +TEST(URLCanonIcuTest, ICUCharsetConverter) {
|
| + struct ICUCase {
|
| + const wchar_t* input;
|
| + const char* encoding;
|
| + const char* expected;
|
| + } icu_cases[] = {
|
| + // UTF-8.
|
| + {L"Hello, world", "utf-8", "Hello, world"},
|
| + {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
|
| + // Non-BMP UTF-8.
|
| + {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
|
| + // Big5
|
| + {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
|
| + // Unrepresentable character in the destination set.
|
| + {L"hello\x4f60\x06de\x597dworld", "big5",
|
| + "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
|
| + };
|
| +
|
| + for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) {
|
| + UConvScoper conv(icu_cases[i].encoding);
|
| + ASSERT_TRUE(conv.converter() != NULL);
|
| + ICUCharsetConverter converter(conv.converter());
|
| +
|
| + std::string str;
|
| + StdStringCanonOutput output(&str);
|
| +
|
| + base::string16 input_str(WStringToUTF16(icu_cases[i].input));
|
| + int input_len = static_cast<int>(input_str.length());
|
| + converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
|
| + output.Complete();
|
| +
|
| + EXPECT_STREQ(icu_cases[i].expected, str.c_str());
|
| + }
|
| +
|
| + // Test string sizes around the resize boundary for the output to make sure
|
| + // the converter resizes as needed.
|
| + const int static_size = 16;
|
| + UConvScoper conv("utf-8");
|
| + ASSERT_TRUE(conv.converter());
|
| + ICUCharsetConverter converter(conv.converter());
|
| + for (int i = static_size - 2; i <= static_size + 2; i++) {
|
| + // Make a string with the appropriate length.
|
| + base::string16 input;
|
| + for (int ch = 0; ch < i; ch++)
|
| + input.push_back('a');
|
| +
|
| + RawCanonOutput<static_size> output;
|
| + converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),
|
| + &output);
|
| + EXPECT_EQ(input.length(), static_cast<size_t>(output.length()));
|
| + }
|
| +}
|
| +
|
| +TEST(URLCanonIcuTest, QueryWithConverter) {
|
| + struct QueryCase {
|
| + const char* input8;
|
| + const wchar_t* input16;
|
| + const char* encoding;
|
| + const char* expected;
|
| + } query_cases[] = {
|
| + // Regular ASCII case in some different encodings.
|
| + {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
|
| + {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
|
| + {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
|
| + // Chinese input/output
|
| + {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312",
|
| + "?q=%C4%E3%BA%C3"},
|
| + {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
|
| + // Unencodable character in the destination character set should be
|
| + // escaped. The escape sequence unescapes to be the entity name:
|
| + // "?q=你"
|
| + {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1",
|
| + "?q=Chinese%26%2365319%3B"},
|
| + };
|
| +
|
| + for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) {
|
| + Component out_comp;
|
| +
|
| + UConvScoper conv(query_cases[i].encoding);
|
| + ASSERT_TRUE(!query_cases[i].encoding || conv.converter());
|
| + ICUCharsetConverter converter(conv.converter());
|
| +
|
| + if (query_cases[i].input8) {
|
| + int len = static_cast<int>(strlen(query_cases[i].input8));
|
| + Component in_comp(0, len);
|
| + std::string out_str;
|
| +
|
| + StdStringCanonOutput output(&out_str);
|
| + CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output,
|
| + &out_comp);
|
| + output.Complete();
|
| +
|
| + EXPECT_EQ(query_cases[i].expected, out_str);
|
| + }
|
| +
|
| + if (query_cases[i].input16) {
|
| + base::string16 input16(WStringToUTF16(query_cases[i].input16));
|
| + int len = static_cast<int>(input16.length());
|
| + Component in_comp(0, len);
|
| + std::string out_str;
|
| +
|
| + StdStringCanonOutput output(&out_str);
|
| + CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output,
|
| + &out_comp);
|
| + output.Complete();
|
| +
|
| + EXPECT_EQ(query_cases[i].expected, out_str);
|
| + }
|
| + }
|
| +
|
| + // Extra test for input with embedded NULL;
|
| + std::string out_str;
|
| + StdStringCanonOutput output(&out_str);
|
| + Component out_comp;
|
| + CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
|
| + output.Complete();
|
| + EXPECT_EQ("?a%20%00z%01", out_str);
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +} // namespace url
|
|
|