OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/browser/sync/util/character_set_converters.h" |
| 6 |
| 7 #include <string> |
| 8 |
| 9 #include "base/basictypes.h" |
| 10 #include "testing/gtest/include/gtest/gtest.h" |
| 11 |
| 12 using browser_sync::ToPathString; |
| 13 using browser_sync::ToUTF8; |
| 14 using browser_sync::AppendPathStringToUTF8; |
| 15 using browser_sync::AppendUTF8ToPathString; |
| 16 using browser_sync::PathStringToUTF8; |
| 17 using browser_sync::UTF8ToPathString; |
| 18 using std::string; |
| 19 |
| 20 class CharacterSetConverterTest : public testing::Test { |
| 21 }; |
| 22 |
| 23 TEST_F(CharacterSetConverterTest, ASCIIConversionTest) { |
| 24 string ascii = "Test String"; |
| 25 PathString wide = PSTR("Test String"); |
| 26 ToPathString to_wide(ascii); |
| 27 ASSERT_TRUE(to_wide.good()); |
| 28 ToUTF8 to_utf8(wide); |
| 29 |
| 30 // Using == as gunit doesn't handle PathString equality tests correctly (it |
| 31 // tries to print the PathString and fails). |
| 32 ASSERT_TRUE(PathString(wide) == to_wide.get_string16()); |
| 33 ASSERT_EQ(string(ascii), to_utf8.get_string()); |
| 34 ToPathString to_16(ascii); |
| 35 ASSERT_TRUE(to_16.good()); |
| 36 ASSERT_TRUE(PathString(wide) == to_16.get_string16()); |
| 37 #ifdef OS_WINDOWS |
| 38 // On Linux, PathString is already UTF8 |
| 39 ASSERT_EQ(string(ascii), static_cast<string>(ToUTF8(wide))); |
| 40 #endif |
| 41 // The next line fails the good_checked_ test. It would be a good death test |
| 42 // but they don't work on Windows. |
| 43 // ASSERT_TRUE(wide == ToPathString(utf8).get_string16()); |
| 44 } |
| 45 |
| 46 #ifdef OS_WINDOWS |
| 47 // On Linux, PathString is already UTF8 |
| 48 TEST_F(CharacterSetConverterTest, UnicodeConversionText) { |
| 49 // Source data obtained by running od -b on files saved in utf-8 and unicode |
| 50 // from a text editor. |
| 51 const char* utf8 = "\357\273\277\150\145\154\154\157\040\303\250\303\251" |
| 52 "\302\251\342\202\254\302\243\302\245\302\256\342\204\242"; |
| 53 // #ifdef IS_LITTLE_ENDIAN |
| 54 const PathChar* wide = reinterpret_cast<const PathChar*>("\377\376\150\000" |
| 55 "\145\000\154\000\154\000\157\000\040\000\350\000\351\000\251\000\254\040" |
| 56 "\243\000\245\000\256\000\042\041"); |
| 57 // #else |
| 58 // // This should work, but on Windows we don't have the endian |
| 59 // // macros. Since we only do conversion between 16<->8 on Windows, |
| 60 // // it's safe to assume little endian. |
| 61 // const PathChar* wide = |
| 62 // reinterpret_cast<PathChar*>("\376\377\000\150\000\145\000" |
| 63 // "\154\000\154\000\157\000\040\000\350\000\351\000\251\040\254\000\243" |
| 64 // "\000\245\000\256\041\042"); |
| 65 // #endif |
| 66 |
| 67 ToPathString to_wide(utf8); |
| 68 ASSERT_TRUE(to_wide.good()); |
| 69 ToUTF8 to_utf8(wide); |
| 70 |
| 71 // Using == as gunit doesn't handle PathString equality tests correctly (it |
| 72 // tries to print the PathString and fails). |
| 73 ASSERT_TRUE(wide == to_wide.get_string16()); |
| 74 ASSERT_EQ(string(utf8), to_utf8.get_string()); |
| 75 ToPathString to_16(utf8); |
| 76 ASSERT_TRUE(to_16.good()); |
| 77 ASSERT_TRUE(wide == to_16.get_string16()); |
| 78 ASSERT_EQ(string(utf8), reinterpret_cast<const string&>(ToUTF8(wide))); |
| 79 } |
| 80 #endif |
| 81 |
| 82 TEST_F(CharacterSetConverterTest, AppendUTF8Tests) { |
| 83 PathString one = PSTR("one"); |
| 84 PathString two = PSTR("two"); |
| 85 PathString three = PSTR("three"); |
| 86 string out; |
| 87 AppendPathStringToUTF8(one.data(), one.length(), &out); |
| 88 AppendPathStringToUTF8(two.data(), two.length(), &out); |
| 89 AppendPathStringToUTF8(three.data(), three.length(), &out); |
| 90 ASSERT_EQ(out, "onetwothree"); |
| 91 PathString onetwothree = PSTR("onetwothree"); |
| 92 PathStringToUTF8(onetwothree.data(), onetwothree.length(), &out); |
| 93 ASSERT_EQ(out, "onetwothree"); |
| 94 } |
| 95 |
| 96 TEST_F(CharacterSetConverterTest, AppendPathStringTests) { |
| 97 string one = "one"; |
| 98 string two = "two"; |
| 99 string three = "three"; |
| 100 PathString out; |
| 101 AppendUTF8ToPathString(one.data(), one.length(), &out); |
| 102 AppendUTF8ToPathString(two.data(), two.length(), &out); |
| 103 AppendUTF8ToPathString(three.data(), three.length(), &out); |
| 104 ASSERT_TRUE(out == PathString(PSTR("onetwothree"))); |
| 105 string onetwothree = "onetwothree"; |
| 106 UTF8ToPathString(onetwothree.data(), onetwothree.length(), &out); |
| 107 ASSERT_TRUE(out == PathString(PSTR("onetwothree"))); |
| 108 } |
| 109 |
| 110 #ifdef OS_WINDOWS |
| 111 namespace { |
| 112 // See http://en.wikipedia.org/wiki/UTF-16 for an explanation of UTF16. |
| 113 // For a test case we use the UTF-8 and UTF-16 encoding of char 119070 |
| 114 // (hex 1D11E), which is musical G clef. |
| 115 const unsigned char utf8_test_string[] = { |
| 116 0xEF, 0xBB, 0xBF, // BOM |
| 117 0xE6, 0xB0, 0xB4, // water, Chinese (0x6C34) |
| 118 0x7A, // lower case z |
| 119 0xF0, 0x9D, 0x84, 0x9E, // musical G clef (0x1D11E) |
| 120 0x00, |
| 121 }; |
| 122 const PathChar utf16_test_string[] = { |
| 123 0xFEFF, // BOM |
| 124 0x6C34, // water, Chinese |
| 125 0x007A, // lower case z |
| 126 0xD834, 0xDD1E, // musical G clef (0x1D11E) |
| 127 0x0000, |
| 128 }; |
| 129 } |
| 130 |
| 131 TEST_F(CharacterSetConverterTest, UTF16ToUTF8Test) { |
| 132 // Avoid truncation warning. |
| 133 const char* utf8_test_string_pointer = |
| 134 reinterpret_cast<const char*>(utf8_test_string); |
| 135 ASSERT_STREQ(utf8_test_string_pointer, ToUTF8(utf16_test_string)); |
| 136 } |
| 137 |
| 138 TEST_F(CharacterSetConverterTest, utf8_test_stringToUTF16Test) { |
| 139 // Avoid truncation warning. |
| 140 const char* utf8_test_string_pointer = |
| 141 reinterpret_cast<const char*>(utf8_test_string); |
| 142 ToPathString converted_utf8(utf8_test_string_pointer); |
| 143 ASSERT_TRUE(converted_utf8.good()); |
| 144 ASSERT_EQ(wcscmp(utf16_test_string, converted_utf8), 0); |
| 145 } |
| 146 |
| 147 TEST(NameTruncation, WindowsNameTruncation) { |
| 148 using browser_sync::TrimPathStringToValidCharacter; |
| 149 PathChar array[] = {'1', '2', 0xD950, 0xDF21, '3', '4', 0}; |
| 150 PathString message = array; |
| 151 ASSERT_EQ(message.length(), arraysize(array) - 1); |
| 152 int old_length = message.length(); |
| 153 while (old_length != 0) { |
| 154 TrimPathStringToValidCharacter(&message); |
| 155 if (old_length == 4) |
| 156 EXPECT_EQ(3, message.length()); |
| 157 else |
| 158 EXPECT_EQ(old_length, message.length()); |
| 159 message.resize(message.length() - 1); |
| 160 old_length = message.length(); |
| 161 } |
| 162 TrimPathStringToValidCharacter(&message); |
| 163 } |
| 164 #else |
| 165 |
| 166 // TODO(zork): Add unittests here once we're running these tests on linux. |
| 167 |
| 168 #endif |
OLD | NEW |