| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/basictypes.h" | 5 #include "base/basictypes.h" |
| 6 #include "base/string_util.h" | 6 #include "base/string_util.h" |
| 7 #include "testing/gtest/include/gtest/gtest.h" | 7 #include "testing/gtest/include/gtest/gtest.h" |
| 8 | 8 |
| 9 namespace base { | 9 namespace base { |
| 10 | 10 |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 84 } | 84 } |
| 85 | 85 |
| 86 TEST(UTFStringConversionsTest, ConvertUTF8ToWide) { | 86 TEST(UTFStringConversionsTest, ConvertUTF8ToWide) { |
| 87 struct UTF8ToWideCase { | 87 struct UTF8ToWideCase { |
| 88 const char* utf8; | 88 const char* utf8; |
| 89 const wchar_t* wide; | 89 const wchar_t* wide; |
| 90 bool success; | 90 bool success; |
| 91 } convert_cases[] = { | 91 } convert_cases[] = { |
| 92 // Regular UTF-8 input. | 92 // Regular UTF-8 input. |
| 93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, | 93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, |
| 94 // Non-character is rejected. | 94 // Non-character is passed through. |
| 95 {"\xef\xbf\xbfHello", L"\xfffdHello", false}, | 95 {"\xef\xbf\xbfHello", L"\xffffHello", true}, |
| 96 // Truncated UTF-8 sequence. | 96 // Truncated UTF-8 sequence. |
| 97 {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, | 97 {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, |
| 98 // Truncated off the end. | 98 // Truncated off the end. |
| 99 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false}, | 99 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false}, |
| 100 // Non-shortest-form UTF-8. | 100 // Non-shortest-form UTF-8. |
| 101 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, | 101 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, |
| 102 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. | 102 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. |
| 103 {"\xed\xb0\x80", L"\xfffd", false}, | 103 {"\xed\xb0\x80", L"\xfffd", false}, |
| 104 // Non-BMP characters. The second is a non-character regarded as valid. | 104 // Non-BMP characters. The second is a non-character regarded as valid. |
| 105 // The result will either be in UTF-16 or UTF-32. | 105 // The result will either be in UTF-16 or UTF-32. |
| 106 #if defined(WCHAR_T_IS_UTF16) | 106 #if defined(WCHAR_T_IS_UTF16) |
| 107 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, | 107 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, |
| 108 {"A\xF4\x8F\xBF\xBEz", L"A\xfffdz", false}, | 108 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true}, |
| 109 #elif defined(WCHAR_T_IS_UTF32) | 109 #elif defined(WCHAR_T_IS_UTF32) |
| 110 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, | 110 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, |
| 111 {"A\xF4\x8F\xBF\xBEz", L"A\xfffdz", false}, | 111 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true}, |
| 112 #endif | 112 #endif |
| 113 }; | 113 }; |
| 114 | 114 |
| 115 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { | 115 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { |
| 116 std::wstring converted; | 116 std::wstring converted; |
| 117 EXPECT_EQ(convert_cases[i].success, | 117 EXPECT_EQ(convert_cases[i].success, |
| 118 UTF8ToWide(convert_cases[i].utf8, | 118 UTF8ToWide(convert_cases[i].utf8, |
| 119 strlen(convert_cases[i].utf8), | 119 strlen(convert_cases[i].utf8), |
| 120 &converted)); | 120 &converted)); |
| 121 std::wstring expected(convert_cases[i].wide); | 121 std::wstring expected(convert_cases[i].wide); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 141 TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) { | 141 TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) { |
| 142 struct WideToUTF8Case { | 142 struct WideToUTF8Case { |
| 143 const wchar_t* utf16; | 143 const wchar_t* utf16; |
| 144 const char* utf8; | 144 const char* utf8; |
| 145 bool success; | 145 bool success; |
| 146 } convert_cases[] = { | 146 } convert_cases[] = { |
| 147 // Regular UTF-16 input. | 147 // Regular UTF-16 input. |
| 148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, | 148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
| 149 // Test a non-BMP character. | 149 // Test a non-BMP character. |
| 150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, | 150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, |
| 151 // Non-characters are rejected. | 151 // Non-characters are passed through. |
| 152 {L"\xffffHello", "\xef\xbf\xbdHello", false}, | 152 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, |
| 153 {L"\xdbff\xdffeHello", "\xef\xbf\xbdHello", false}, | 153 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true}, |
| 154 // The first character is a truncated UTF-16 character. | 154 // The first character is a truncated UTF-16 character. |
| 155 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, | 155 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, |
| 156 // Truncated at the end. | 156 // Truncated at the end. |
| 157 {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", false}, | 157 {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", false}, |
| 158 }; | 158 }; |
| 159 | 159 |
| 160 for (int i = 0; i < arraysize(convert_cases); i++) { | 160 for (int i = 0; i < arraysize(convert_cases); i++) { |
| 161 std::string converted; | 161 std::string converted; |
| 162 EXPECT_EQ(convert_cases[i].success, | 162 EXPECT_EQ(convert_cases[i].success, |
| 163 WideToUTF8(convert_cases[i].utf16, | 163 WideToUTF8(convert_cases[i].utf16, |
| 164 wcslen(convert_cases[i].utf16), | 164 wcslen(convert_cases[i].utf16), |
| 165 &converted)); | 165 &converted)); |
| 166 std::string expected(convert_cases[i].utf8); | 166 std::string expected(convert_cases[i].utf8); |
| 167 EXPECT_EQ(expected, converted); | 167 EXPECT_EQ(expected, converted); |
| 168 } | 168 } |
| 169 } | 169 } |
| 170 | 170 |
| 171 #elif defined(WCHAR_T_IS_UTF32) | 171 #elif defined(WCHAR_T_IS_UTF32) |
| 172 // This test is only valid when wchar_t == UTF-32. | 172 // This test is only valid when wchar_t == UTF-32. |
| 173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { | 173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { |
| 174 struct WideToUTF8Case { | 174 struct WideToUTF8Case { |
| 175 const wchar_t* utf32; | 175 const wchar_t* utf32; |
| 176 const char* utf8; | 176 const char* utf8; |
| 177 bool success; | 177 bool success; |
| 178 } convert_cases[] = { | 178 } convert_cases[] = { |
| 179 // Regular 16-bit input. | 179 // Regular 16-bit input. |
| 180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, | 180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
| 181 // Test a non-BMP character. | 181 // Test a non-BMP character. |
| 182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, | 182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, |
| 183 // Non-characters are rejected. | 183 // Non-characters are passed through. |
| 184 {L"\xffffHello", "\xEF\xBF\xBDHello", false}, | 184 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, |
| 185 {L"\x10fffeHello", "\xEF\xBF\xBDHello", false}, | 185 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true}, |
| 186 // Invalid Unicode code points. | 186 // Invalid Unicode code points. |
| 187 {L"\xfffffffHello", "\xEF\xBF\xBDHello", false}, | 187 {L"\xfffffffHello", "\xEF\xBF\xBDHello", false}, |
| 188 // The first character is a truncated UTF-16 character. | 188 // The first character is a truncated UTF-16 character. |
| 189 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, | 189 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, |
| 190 {L"\xdc01Hello", "\xef\xbf\xbdHello", false}, | 190 {L"\xdc01Hello", "\xef\xbf\xbdHello", false}, |
| 191 }; | 191 }; |
| 192 | 192 |
| 193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { | 193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { |
| 194 std::string converted; | 194 std::string converted; |
| 195 EXPECT_EQ(convert_cases[i].success, | 195 EXPECT_EQ(convert_cases[i].success, |
| (...skipping 24 matching lines...) Expand all Loading... |
| 220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); | 220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); |
| 221 std::string expected; | 221 std::string expected; |
| 222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); | 222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); |
| 223 EXPECT_EQ(arraysize(multi) - 1, expected.length()); | 223 EXPECT_EQ(arraysize(multi) - 1, expected.length()); |
| 224 const std::string& converted = WideToUTF8(wmultistring); | 224 const std::string& converted = WideToUTF8(wmultistring); |
| 225 EXPECT_EQ(arraysize(multi) - 1, converted.length()); | 225 EXPECT_EQ(arraysize(multi) - 1, converted.length()); |
| 226 EXPECT_EQ(expected, converted); | 226 EXPECT_EQ(expected, converted); |
| 227 } | 227 } |
| 228 | 228 |
| 229 } // namaspace base | 229 } // namaspace base |
| OLD | NEW |