base/strings/sys_string_conversions_unittest.cc - Issue 1647803004: Move base to DEPS

Side by Side Diff: base/strings/sys_string_conversions_unittest.cc

Issue 1647803004: Move base to DEPS (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include <string>

6

7 #include "base/basictypes.h"

8 #include "base/strings/string_piece.h"

9 #include "base/strings/sys_string_conversions.h"

10 #include "base/strings/utf_string_conversions.h"

11 #include "base/test/scoped_locale.h"

12 #include "testing/gtest/include/gtest/gtest.h"

13

14 #ifdef WCHAR_T_IS_UTF32

15 static const std::wstring kSysWideOldItalicLetterA = L"\x10300";

16 #else

17 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00";

18 #endif

19

20 namespace base {

21

22 TEST(SysStrings, SysWideToUTF8) {

23 EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world"));

24 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d"));

25

26 // >16 bits

27 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA));

28

29 // Error case. When Windows finds a UTF-16 character going off the end of

30 // a string, it just converts that literal value to UTF-8, even though this

31 // is invalid.

32 //

33 // This is what XP does, but Vista has different behavior, so we don't bother

34 // verifying it:

35 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",

36 // SysWideToUTF8(L"\x4f60\xd800zyxw"));

37

38 // Test embedded NULLs.

39 std::wstring wide_null(L"a");

40 wide_null.push_back(0);

41 wide_null.push_back('b');

42

43 std::string expected_null("a");

44 expected_null.push_back(0);

45 expected_null.push_back('b');

46

47 EXPECT_EQ(expected_null, SysWideToUTF8(wide_null));

48 }

49

50 TEST(SysStrings, SysUTF8ToWide) {

51 EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world"));

52 EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));

53 // >16 bits

54 EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80"));

55

56 // Error case. When Windows finds an invalid UTF-8 character, it just skips

57 // it. This seems weird because it's inconsistent with the reverse conversion.

58 //

59 // This is what XP does, but Vista has different behavior, so we don't bother

60 // verifying it:

61 // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));

62

63 // Test embedded NULLs.

64 std::string utf8_null("a");

65 utf8_null.push_back(0);

66 utf8_null.push_back('b');

67

68 std::wstring expected_null(L"a");

69 expected_null.push_back(0);

70 expected_null.push_back('b');

71

72 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null));

73 }

74

75 #if defined(OS_LINUX) // Tests depend on setting a specific Linux locale.

76

77 TEST(SysStrings, SysWideToNativeMB) {

78 #if !defined(SYSTEM_NATIVE_UTF8)

79 ScopedLocale locale("en_US.utf-8");

80 #endif

81 EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world"));

82 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d"));

83

84 // >16 bits

85 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA));

86

87 // Error case. When Windows finds a UTF-16 character going off the end of

88 // a string, it just converts that literal value to UTF-8, even though this

89 // is invalid.

90 //

91 // This is what XP does, but Vista has different behavior, so we don't bother

92 // verifying it:

93 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",

94 // SysWideToNativeMB(L"\x4f60\xd800zyxw"));

95

96 // Test embedded NULLs.

97 std::wstring wide_null(L"a");

98 wide_null.push_back(0);

99 wide_null.push_back('b');

100

101 std::string expected_null("a");

102 expected_null.push_back(0);

103 expected_null.push_back('b');

104

105 EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null));

106 }

107

108 // We assume the test is running in a UTF8 locale.

109 TEST(SysStrings, SysNativeMBToWide) {

110 #if !defined(SYSTEM_NATIVE_UTF8)

111 ScopedLocale locale("en_US.utf-8");

112 #endif

113 EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world"));

114 EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));

115 // >16 bits

116 EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80"));

117

118 // Error case. When Windows finds an invalid UTF-8 character, it just skips

119 // it. This seems weird because it's inconsistent with the reverse conversion.

120 //

121 // This is what XP does, but Vista has different behavior, so we don't bother

122 // verifying it:

123 // EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));

124

125 // Test embedded NULLs.

126 std::string utf8_null("a");

127 utf8_null.push_back(0);

128 utf8_null.push_back('b');

129

130 std::wstring expected_null(L"a");

131 expected_null.push_back(0);

132 expected_null.push_back('b');

133

134 EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null));

135 }

136

137 static const wchar_t* const kConvertRoundtripCases[] = {

138 L"Google Video",

139 // "网页图片资讯更多 »"

140 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",

141 // "Παγκόσμιος Ιστός"

142 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"

143 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",

144 // "Поиск страниц на русском"

145 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"

146 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"

147 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",

148 // "전체서비스"

149 L"\xc804\xccb4\xc11c\xbe44\xc2a4",

150

151 // Test characters that take more than 16 bits. This will depend on whether

152 // wchar_t is 16 or 32 bits.

153 #if defined(WCHAR_T_IS_UTF16)

154 L"\xd800\xdf00",

155 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)

156 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",

157 #elif defined(WCHAR_T_IS_UTF32)

158 L"\x10300",

159 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)

160 L"\x11d40\x11d41\x11d42\x11d43\x11d44",

161 #endif

162 };

163

164

165 TEST(SysStrings, SysNativeMBAndWide) {

166 #if !defined(SYSTEM_NATIVE_UTF8)

167 ScopedLocale locale("en_US.utf-8");

168 #endif

169 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {

170 std::wstring wide = kConvertRoundtripCases[i];

171 std::wstring trip = SysNativeMBToWide(SysWideToNativeMB(wide));

172 EXPECT_EQ(wide.size(), trip.size());

173 EXPECT_EQ(wide, trip);

174 }

175

176 // We assume our test is running in UTF-8, so double check through ICU.

177 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {

178 std::wstring wide = kConvertRoundtripCases[i];

179 std::wstring trip = SysNativeMBToWide(WideToUTF8(wide));

180 EXPECT_EQ(wide.size(), trip.size());

181 EXPECT_EQ(wide, trip);

182 }

183

184 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {

185 std::wstring wide = kConvertRoundtripCases[i];

186 std::wstring trip = UTF8ToWide(SysWideToNativeMB(wide));

187 EXPECT_EQ(wide.size(), trip.size());

188 EXPECT_EQ(wide, trip);

189 }

190 }

191 #endif // OS_LINUX

192

193 } // namespace base

OLD	NEW

« no previous file with comments | « base/strings/sys_string_conversions_posix.cc ('k') | base/strings/sys_string_conversions_win.cc » ('j') | no next file with comments »