url/url_canon_icu_unittest.cc - Issue 257673002: Make it possible to build url/ without ICU on android.

Unified Diff: url/url_canon_icu_unittest.cc

Issue 257673002: Make it possible to build url/ without ICU on android. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Missed a spot Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: url/url_canon_icu_unittest.cc

diff --git a/url/url_canon_icu_unittest.cc b/url/url_canon_icu_unittest.cc

new file mode 100644

index 0000000000000000000000000000000000000000..83f6083c0b7cccc875eff63599acfe55dae39314

--- /dev/null

+++ b/url/url_canon_icu_unittest.cc

@@ -0,0 +1,167 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "base/macros.h"

+#include "testing/gtest/include/gtest/gtest.h"

+#include "third_party/icu/source/common/unicode/ucnv.h"

+#include "url/url_canon.h"

+#include "url/url_canon_icu.h"

+#include "url/url_canon_stdstring.h"

+#include "url/url_test_utils.h"

+// Some implementations of base/basictypes.h may define ARRAYSIZE.

+// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro

+// which is in our version of basictypes.h.

+#ifndef ARRAYSIZE

+#define ARRAYSIZE ARRAYSIZE_UNSAFE

+#endif

+namespace url {

+using test_utils::WStringToUTF16;

+namespace {

+// Wrapper around a UConverter object that managers creation and destruction.

+class UConvScoper {

+ public:

+ explicit UConvScoper(const char* charset_name) {

+ UErrorCode err = U_ZERO_ERROR;

+ converter_ = ucnv_open(charset_name, &err);

+ }

+ ~UConvScoper() {

+ if (converter_)

+ ucnv_close(converter_);

+ }

+ // Returns the converter object, may be NULL.

+ UConverter* converter() const { return converter_; }

+ private:

+ UConverter* converter_;

+};

+TEST(URLCanonIcuTest, ICUCharsetConverter) {

+ struct ICUCase {

+ const wchar_t* input;

+ const char* encoding;

+ const char* expected;

+ } icu_cases[] = {

+ // UTF-8.

+ {L"Hello, world", "utf-8", "Hello, world"},

+ {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},

+ // Non-BMP UTF-8.

+ {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},

+ // Big5

+ {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},

+ // Unrepresentable character in the destination set.

+ {L"hello\x4f60\x06de\x597dworld", "big5",

+ "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},

+ };

+ for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) {

+ UConvScoper conv(icu_cases[i].encoding);

+ ASSERT_TRUE(conv.converter() != NULL);

+ ICUCharsetConverter converter(conv.converter());

+ std::string str;

+ StdStringCanonOutput output(&str);

+ base::string16 input_str(WStringToUTF16(icu_cases[i].input));

+ int input_len = static_cast<int>(input_str.length());

+ converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);

+ output.Complete();

+ EXPECT_STREQ(icu_cases[i].expected, str.c_str());

+ }

+ // Test string sizes around the resize boundary for the output to make sure

+ // the converter resizes as needed.

+ const int static_size = 16;

+ UConvScoper conv("utf-8");

+ ASSERT_TRUE(conv.converter());

+ ICUCharsetConverter converter(conv.converter());

+ for (int i = static_size - 2; i <= static_size + 2; i++) {

+ // Make a string with the appropriate length.

+ base::string16 input;

+ for (int ch = 0; ch < i; ch++)

+ input.push_back('a');

+ RawCanonOutput<static_size> output;

+ converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),

+ &output);

+ EXPECT_EQ(input.length(), static_cast<size_t>(output.length()));

+ }

+TEST(URLCanonIcuTest, QueryWithConverter) {

+ struct QueryCase {

+ const char* input8;

+ const wchar_t* input16;

+ const char* encoding;

+ const char* expected;

+ } query_cases[] = {

+ // Regular ASCII case in some different encodings.

+ {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},

+ {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},

+ {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},

+ // Chinese input/output

+ {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312",

+ "?q=%C4%E3%BA%C3"},

+ {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},

+ // Unencodable character in the destination character set should be

+ // escaped. The escape sequence unescapes to be the entity name:

+ // "?q=你"

+ {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1",

+ "?q=Chinese%26%2365319%3B"},

+ };

+ for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) {

+ Component out_comp;

+ UConvScoper conv(query_cases[i].encoding);

+ ASSERT_TRUE(!query_cases[i].encoding || conv.converter());

+ ICUCharsetConverter converter(conv.converter());

+ if (query_cases[i].input8) {

+ int len = static_cast<int>(strlen(query_cases[i].input8));

+ Component in_comp(0, len);

+ std::string out_str;

+ StdStringCanonOutput output(&out_str);

+ CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output,

+ &out_comp);

+ output.Complete();

+ EXPECT_EQ(query_cases[i].expected, out_str);

+ }

+ if (query_cases[i].input16) {

+ base::string16 input16(WStringToUTF16(query_cases[i].input16));

+ int len = static_cast<int>(input16.length());

+ Component in_comp(0, len);

+ std::string out_str;

+ StdStringCanonOutput output(&out_str);

+ CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output,

+ &out_comp);

+ output.Complete();

+ EXPECT_EQ(query_cases[i].expected, out_str);

+ }

+ // Extra test for input with embedded NULL;

+ std::string out_str;

+ StdStringCanonOutput output(&out_str);

+ Component out_comp;

+ CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);

+ output.Complete();

+ EXPECT_EQ("?a%20%00z%01", out_str);

+} // namespace

+} // namespace url

« no previous file with comments | « url/url_canon_icu_alternatives_android.cc ('k') | url/url_canon_unittest.cc » ('j') | no next file with comments »