OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/sync/util/character_set_converters.h" | 5 #include "chrome/browser/sync/util/character_set_converters.h" |
6 | 6 |
7 #include <string> | 7 #include <string> |
8 | 8 |
9 #include "base/basictypes.h" | |
10 #include "build/build_config.h" | |
11 #include "testing/gtest/include/gtest/gtest.h" | 9 #include "testing/gtest/include/gtest/gtest.h" |
12 | 10 |
13 using browser_sync::ToPathString; | |
14 using browser_sync::ToUTF8; | |
15 using browser_sync::AppendPathStringToUTF8; | |
16 using browser_sync::AppendUTF8ToPathString; | |
17 using browser_sync::PathStringToUTF8; | |
18 using browser_sync::UTF8ToPathString; | |
19 using std::string; | 11 using std::string; |
20 | 12 |
21 class CharacterSetConverterTest : public testing::Test { | 13 class CharacterSetConverterTest : public testing::Test { |
22 }; | 14 }; |
23 | 15 |
24 TEST_F(CharacterSetConverterTest, ASCIIConversionTest) { | |
25 string ascii = "Test String"; | |
26 PathString wide = PSTR("Test String"); | |
27 ToPathString to_wide(ascii); | |
28 ASSERT_TRUE(to_wide.good()); | |
29 ToUTF8 to_utf8(wide); | |
30 | |
31 // Using == as gunit doesn't handle PathString equality tests correctly (it | |
32 // tries to print the PathString and fails). | |
33 ASSERT_TRUE(PathString(wide) == to_wide.get_string16()); | |
34 ASSERT_EQ(string(ascii), to_utf8.get_string()); | |
35 ToPathString to_16(ascii); | |
36 ASSERT_TRUE(to_16.good()); | |
37 ASSERT_TRUE(PathString(wide) == to_16.get_string16()); | |
38 #if defined(OS_WIN) | |
39 // On Linux, PathString is already UTF8 | |
40 ASSERT_EQ(string(ascii), static_cast<string>(ToUTF8(wide))); | |
41 #endif | |
42 // The next line fails the good_checked_ test. It would be a good death test | |
43 // but they don't work on Windows. | |
44 // ASSERT_TRUE(wide == ToPathString(utf8).get_string16()); | |
45 } | |
46 | |
47 #if defined(OS_WIN) | |
48 // On Linux, PathString is already UTF8 | |
49 TEST_F(CharacterSetConverterTest, UnicodeConversionText) { | |
50 // Source data obtained by running od -b on files saved in utf-8 and unicode | |
51 // from a text editor. | |
52 const char* utf8 = "\357\273\277\150\145\154\154\157\040\303\250\303\251" | |
53 "\302\251\342\202\254\302\243\302\245\302\256\342\204\242"; | |
54 // #ifdef IS_LITTLE_ENDIAN | |
55 const PathChar* wide = reinterpret_cast<const PathChar*>("\377\376\150\000" | |
56 "\145\000\154\000\154\000\157\000\040\000\350\000\351\000\251\000\254\040" | |
57 "\243\000\245\000\256\000\042\041"); | |
58 // #else | |
59 // // This should work, but on Windows we don't have the endian | |
60 // // macros. Since we only do conversion between 16<->8 on Windows, | |
61 // // it's safe to assume little endian. | |
62 // const PathChar* wide = | |
63 // reinterpret_cast<PathChar*>("\376\377\000\150\000\145\000" | |
64 // "\154\000\154\000\157\000\040\000\350\000\351\000\251\040\254\000\243" | |
65 // "\000\245\000\256\041\042"); | |
66 // #endif | |
67 | |
68 ToPathString to_wide(utf8); | |
69 ASSERT_TRUE(to_wide.good()); | |
70 ToUTF8 to_utf8(wide); | |
71 | |
72 // Using == as gunit doesn't handle PathString equality tests correctly (it | |
73 // tries to print the PathString and fails). | |
74 ASSERT_TRUE(wide == to_wide.get_string16()); | |
75 ASSERT_EQ(string(utf8), to_utf8.get_string()); | |
76 ToPathString to_16(utf8); | |
77 ASSERT_TRUE(to_16.good()); | |
78 ASSERT_TRUE(wide == to_16.get_string16()); | |
79 ASSERT_EQ(string(utf8), reinterpret_cast<const string&>(ToUTF8(wide))); | |
80 } | |
81 #endif // defined(OS_WIN) | |
82 | |
83 TEST_F(CharacterSetConverterTest, AppendUTF8Tests) { | |
84 PathString one = PSTR("one"); | |
85 PathString two = PSTR("two"); | |
86 PathString three = PSTR("three"); | |
87 string out; | |
88 AppendPathStringToUTF8(one.data(), one.length(), &out); | |
89 AppendPathStringToUTF8(two.data(), two.length(), &out); | |
90 AppendPathStringToUTF8(three.data(), three.length(), &out); | |
91 ASSERT_EQ(out, "onetwothree"); | |
92 PathString onetwothree = PSTR("onetwothree"); | |
93 PathStringToUTF8(onetwothree.data(), onetwothree.length(), &out); | |
94 ASSERT_EQ(out, "onetwothree"); | |
95 } | |
96 | |
97 TEST_F(CharacterSetConverterTest, AppendPathStringTests) { | |
98 string one = "one"; | |
99 string two = "two"; | |
100 string three = "three"; | |
101 PathString out; | |
102 AppendUTF8ToPathString(one.data(), one.length(), &out); | |
103 AppendUTF8ToPathString(two.data(), two.length(), &out); | |
104 AppendUTF8ToPathString(three.data(), three.length(), &out); | |
105 ASSERT_TRUE(out == PathString(PSTR("onetwothree"))); | |
106 string onetwothree = "onetwothree"; | |
107 UTF8ToPathString(onetwothree.data(), onetwothree.length(), &out); | |
108 ASSERT_TRUE(out == PathString(PSTR("onetwothree"))); | |
109 } | |
110 | |
111 #if defined(OS_WIN) | |
112 namespace { | |
113 // See http://en.wikipedia.org/wiki/UTF-16 for an explanation of UTF16. | |
114 // For a test case we use the UTF-8 and UTF-16 encoding of char 119070 | |
115 // (hex 1D11E), which is musical G clef. | |
116 const unsigned char utf8_test_string[] = { | |
117 0xEF, 0xBB, 0xBF, // BOM | |
118 0xE6, 0xB0, 0xB4, // water, Chinese (0x6C34) | |
119 0x7A, // lower case z | |
120 0xF0, 0x9D, 0x84, 0x9E, // musical G clef (0x1D11E) | |
121 0x00, | |
122 }; | |
123 const PathChar utf16_test_string[] = { | |
124 0xFEFF, // BOM | |
125 0x6C34, // water, Chinese | |
126 0x007A, // lower case z | |
127 0xD834, 0xDD1E, // musical G clef (0x1D11E) | |
128 0x0000, | |
129 }; | |
130 } | |
131 | |
132 TEST_F(CharacterSetConverterTest, UTF16ToUTF8Test) { | |
133 // Avoid truncation warning. | |
134 const char* utf8_test_string_pointer = | |
135 reinterpret_cast<const char*>(utf8_test_string); | |
136 ASSERT_STREQ(utf8_test_string_pointer, ToUTF8(utf16_test_string)); | |
137 } | |
138 | |
139 TEST_F(CharacterSetConverterTest, utf8_test_stringToUTF16Test) { | |
140 // Avoid truncation warning. | |
141 const char* utf8_test_string_pointer = | |
142 reinterpret_cast<const char*>(utf8_test_string); | |
143 ToPathString converted_utf8(utf8_test_string_pointer); | |
144 ASSERT_TRUE(converted_utf8.good()); | |
145 ASSERT_EQ(wcscmp(utf16_test_string, converted_utf8), 0); | |
146 } | |
147 | |
148 TEST(NameTruncation, WindowsNameTruncation) { | 16 TEST(NameTruncation, WindowsNameTruncation) { |
149 using browser_sync::TrimPathStringToValidCharacter; | 17 using browser_sync::TrimPathStringToValidCharacter; |
150 PathChar array[] = {'1', '2', 0xD950, 0xDF21, '3', '4', 0}; | 18 PathChar array[] = {'1', '2', '\xc0', '\xe0', '3', '4', '\0'}; |
151 PathString message = array; | 19 PathString message = array; |
152 ASSERT_EQ(message.length(), arraysize(array) - 1); | 20 ASSERT_EQ(message.length(), arraysize(array) - 1); |
153 int old_length = message.length(); | 21 string::size_type old_length = message.length(); |
154 while (old_length != 0) { | 22 while (old_length != 0) { |
155 TrimPathStringToValidCharacter(&message); | 23 TrimPathStringToValidCharacter(&message); |
156 if (old_length == 4) | 24 if (old_length == 4) |
157 EXPECT_EQ(3, message.length()); | 25 EXPECT_EQ(3u, message.length()); |
158 else | 26 else |
159 EXPECT_EQ(old_length, message.length()); | 27 EXPECT_EQ(old_length, message.length()); |
160 message.resize(message.length() - 1); | 28 message.resize(message.length() - 1); |
161 old_length = message.length(); | 29 old_length = message.length(); |
162 } | 30 } |
163 TrimPathStringToValidCharacter(&message); | 31 TrimPathStringToValidCharacter(&message); |
164 } | 32 } |
165 #else | |
166 | |
167 // TODO(zork): Add unittests here once we're running these tests on linux. | |
168 | |
169 #endif | |
OLD | NEW |