base/sys_string_conversions_unittest.cc - Issue 12213061: Move sys_string_conversions to base/strings.

Side by Side Diff: base/sys_string_conversions_unittest.cc

Issue 12213061: Move sys_string_conversions to base/strings. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: rm him Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5

6 #include <string>

7

8 #include "base/basictypes.h"

9 #include "base/string_piece.h"

10 #include "base/test/scoped_locale.h"

11 #include "base/utf_string_conversions.h"

12 #include "base/sys_string_conversions.h"

13 #include "testing/gtest/include/gtest/gtest.h"

14

15 #ifdef WCHAR_T_IS_UTF32

16 static const std::wstring kSysWideOldItalicLetterA = L"\x10300";

17 #else

18 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00";

19 #endif

20

21 TEST(SysStrings, SysWideToUTF8) {

22 using base::SysWideToUTF8;

23 EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world"));

24 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d"));

25

26 // >16 bits

27 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA));

28

29 // Error case. When Windows finds a UTF-16 character going off the end of

30 // a string, it just converts that literal value to UTF-8, even though this

31 // is invalid.

32 //

33 // This is what XP does, but Vista has different behavior, so we don't bother

34 // verifying it:

35 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",

36 // SysWideToUTF8(L"\x4f60\xd800zyxw"));

37

38 // Test embedded NULLs.

39 std::wstring wide_null(L"a");

40 wide_null.push_back(0);

41 wide_null.push_back('b');

42

43 std::string expected_null("a");

44 expected_null.push_back(0);

45 expected_null.push_back('b');

46

47 EXPECT_EQ(expected_null, SysWideToUTF8(wide_null));

48 }

49

50 TEST(SysStrings, SysUTF8ToWide) {

51 using base::SysUTF8ToWide;

52 EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world"));

53 EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));

54 // >16 bits

55 EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80"));

56

57 // Error case. When Windows finds an invalid UTF-8 character, it just skips

58 // it. This seems weird because it's inconsistent with the reverse conversion.

59 //

60 // This is what XP does, but Vista has different behavior, so we don't bother

61 // verifying it:

62 // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));

63

64 // Test embedded NULLs.

65 std::string utf8_null("a");

66 utf8_null.push_back(0);

67 utf8_null.push_back('b');

68

69 std::wstring expected_null(L"a");

70 expected_null.push_back(0);

71 expected_null.push_back('b');

72

73 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null));

74 }

75

76 #if defined(OS_LINUX) // Tests depend on setting a specific Linux locale.

77

78 TEST(SysStrings, SysWideToNativeMB) {

79 using base::SysWideToNativeMB;

80 base::ScopedLocale locale("en_US.utf-8");

81 EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world"));

82 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d"));

83

84 // >16 bits

85 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA));

86

87 // Error case. When Windows finds a UTF-16 character going off the end of

88 // a string, it just converts that literal value to UTF-8, even though this

89 // is invalid.

90 //

91 // This is what XP does, but Vista has different behavior, so we don't bother

92 // verifying it:

93 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",

94 // SysWideToNativeMB(L"\x4f60\xd800zyxw"));

95

96 // Test embedded NULLs.

97 std::wstring wide_null(L"a");

98 wide_null.push_back(0);

99 wide_null.push_back('b');

100

101 std::string expected_null("a");

102 expected_null.push_back(0);

103 expected_null.push_back('b');

104

105 EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null));

106 }

107

108 // We assume the test is running in a UTF8 locale.

109 TEST(SysStrings, SysNativeMBToWide) {

110 using base::SysNativeMBToWide;

111 base::ScopedLocale locale("en_US.utf-8");

112 EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world"));

113 EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));

114 // >16 bits

115 EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80"));

116

117 // Error case. When Windows finds an invalid UTF-8 character, it just skips

118 // it. This seems weird because it's inconsistent with the reverse conversion.

119 //

120 // This is what XP does, but Vista has different behavior, so we don't bother

121 // verifying it:

122 // EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));

123

124 // Test embedded NULLs.

125 std::string utf8_null("a");

126 utf8_null.push_back(0);

127 utf8_null.push_back('b');

128

129 std::wstring expected_null(L"a");

130 expected_null.push_back(0);

131 expected_null.push_back('b');

132

133 EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null));

134 }

135

136 static const wchar_t* const kConvertRoundtripCases[] = {

137 L"Google Video",

138 // "网页图片资讯更多 »"

139 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",

140 // "Παγκόσμιος Ιστός"

141 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"

142 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",

143 // "Поиск страниц на русском"

144 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"

145 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"

146 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",

147 // "전체서비스"

148 L"\xc804\xccb4\xc11c\xbe44\xc2a4",

149

150 // Test characters that take more than 16 bits. This will depend on whether

151 // wchar_t is 16 or 32 bits.

152 #if defined(WCHAR_T_IS_UTF16)

153 L"\xd800\xdf00",

154 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)

155 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",

156 #elif defined(WCHAR_T_IS_UTF32)

157 L"\x10300",

158 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)

159 L"\x11d40\x11d41\x11d42\x11d43\x11d44",

160 #endif

161 };

162

163

164 TEST(SysStrings, SysNativeMBAndWide) {

165 base::ScopedLocale locale("en_US.utf-8");

166 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {

167 std::wstring wide = kConvertRoundtripCases[i];

168 std::wstring trip = base::SysNativeMBToWide(base::SysWideToNativeMB(wide));

169 EXPECT_EQ(wide.size(), trip.size());

170 EXPECT_EQ(wide, trip);

171 }

172

173 // We assume our test is running in UTF-8, so double check through ICU.

174 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {

175 std::wstring wide = kConvertRoundtripCases[i];

176 std::wstring trip = base::SysNativeMBToWide(WideToUTF8(wide));

177 EXPECT_EQ(wide.size(), trip.size());

178 EXPECT_EQ(wide, trip);

179 }

180

181 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {

182 std::wstring wide = kConvertRoundtripCases[i];

183 std::wstring trip = UTF8ToWide(base::SysWideToNativeMB(wide));

184 EXPECT_EQ(wide.size(), trip.size());

185 EXPECT_EQ(wide, trip);

186 }

187 }

188 #endif // OS_LINUX

OLD	NEW

« no previous file with comments | « base/sys_string_conversions_posix.cc ('k') | base/sys_string_conversions_win.cc » ('j') | no next file with comments »