base/utf_string_conversions_unittest.cc - Issue 522029: If we can't read a unicode character, write the standard "unknown" (0xFFFD) c...

Side by Side Diff: base/utf_string_conversions_unittest.cc

Issue 522029: If we can't read a unicode character, write the standard "unknown" (0xFFFD) c... (Closed) Base URL: svn://chrome-svn.corp.google.com/chrome/trunk/src/

Patch Set: '' Created 10 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/basictypes.h"	5 #include "base/basictypes.h"

6 #include "base/string_util.h"	6 #include "base/string_util.h"

7 #include "testing/gtest/include/gtest/gtest.h"	7 #include "testing/gtest/include/gtest/gtest.h"

8	8

9 namespace base {	9 namespace base {

10	10

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
87 struct UTF8ToWideCase {	87 struct UTF8ToWideCase {

88 const char* utf8;	88 const char* utf8;

89 const wchar_t* wide;	89 const wchar_t* wide;

90 bool success;	90 bool success;

91 } convert_cases[] = {	91 } convert_cases[] = {

92 // Regular UTF-8 input.	92 // Regular UTF-8 input.

93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},	93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},

94 // Non-character is passed through.	94 // Non-character is passed through.

95 {"\xef\xbf\xbfHello", L"\xffffHello", true},	95 {"\xef\xbf\xbfHello", L"\xffffHello", true},

96 // Truncated UTF-8 sequence.	96 // Truncated UTF-8 sequence.

97 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},	97 {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},

98 // Truncated off the end.	98 // Truncated off the end.

99 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false},	99 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false},

100 // Non-shortest-form UTF-8.	100 // Non-shortest-form UTF-8.

101 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},	101 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},

102 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.	102 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.

103 {"\xed\xb0\x80", L"", false},	103 {"\xed\xb0\x80", L"\xfffd", false},

104 // Non-BMP characters. The second is a non-character regarded as valid.	104 // Non-BMP characters. The second is a non-character regarded as valid.

105 // The result will either be in UTF-16 or UTF-32.	105 // The result will either be in UTF-16 or UTF-32.

106 #if defined(WCHAR_T_IS_UTF16)	106 #if defined(WCHAR_T_IS_UTF16)

107 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},	107 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},

108 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},	108 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},

109 #elif defined(WCHAR_T_IS_UTF32)	109 #elif defined(WCHAR_T_IS_UTF32)

110 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},	110 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},

111 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},	111 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},

112 #endif	112 #endif

113 };	113 };

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
145 bool success;	145 bool success;

146 } convert_cases[] = {	146 } convert_cases[] = {

147 // Regular UTF-16 input.	147 // Regular UTF-16 input.

148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},	148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},

149 // Test a non-BMP character.	149 // Test a non-BMP character.

150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},	150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},

151 // Non-characters are passed through.	151 // Non-characters are passed through.

152 {L"\xffffHello", "\xEF\xBF\xBFHello", true},	152 {L"\xffffHello", "\xEF\xBF\xBFHello", true},

153 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},	153 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},

154 // The first character is a truncated UTF-16 character.	154 // The first character is a truncated UTF-16 character.

155 {L"\xd800\x597d", "\xe5\xa5\xbd", false},	155 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false},

156 // Truncated at the end.	156 // Truncated at the end.

157 {L"\x597d\xd800", "\xe5\xa5\xbd", false},	157 {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", false},

158 };	158 };

159	159

160 for (int i = 0; i < arraysize(convert_cases); i++) {	160 for (int i = 0; i < arraysize(convert_cases); i++) {

161 std::string converted;	161 std::string converted;

162 EXPECT_EQ(convert_cases[i].success,	162 EXPECT_EQ(convert_cases[i].success,

163 WideToUTF8(convert_cases[i].utf16,	163 WideToUTF8(convert_cases[i].utf16,

164 wcslen(convert_cases[i].utf16),	164 wcslen(convert_cases[i].utf16),

165 &converted));	165 &converted));

166 std::string expected(convert_cases[i].utf8);	166 std::string expected(convert_cases[i].utf8);

167 EXPECT_EQ(expected, converted);	167 EXPECT_EQ(expected, converted);

168 }	168 }

169 }	169 }

170	170

171 #elif defined(WCHAR_T_IS_UTF32)	171 #elif defined(WCHAR_T_IS_UTF32)

172 // This test is only valid when wchar_t == UTF-32.	172 // This test is only valid when wchar_t == UTF-32.

173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) {	173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) {

174 struct WideToUTF8Case {	174 struct WideToUTF8Case {

175 const wchar_t* utf32;	175 const wchar_t* utf32;

176 const char* utf8;	176 const char* utf8;

177 bool success;	177 bool success;

178 } convert_cases[] = {	178 } convert_cases[] = {

179 // Regular 16-bit input.	179 // Regular 16-bit input.

180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},	180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},

181 // Test a non-BMP character.	181 // Test a non-BMP character.

182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},	182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},

183 // Non-characters are passed through.	183 // Non-characters are passed through.

184 {L"\xffffHello", "\xEF\xBF\xBFHello", true},	184 {L"\xffffHello", "\xEF\xBF\xBFHello", true},

185 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},	185 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},

186 // Invalid Unicode code points.	186 // Invalid Unicode code points.

187 {L"\xfffffffHello", "Hello", false},	187 {L"\xfffffffHello", "\xEF\xBF\xBDHello", false},

188 // The first character is a truncated UTF-16 character.	188 // The first character is a truncated UTF-16 character.

189 {L"\xd800\x597d", "\xe5\xa5\xbd", false},	189 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false},

190 {L"\xdc01Hello", "Hello", false},	190 {L"\xdc01Hello", "\xef\xbf\xbdHello", false},

191 };	191 };

192	192

193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {	193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {

194 std::string converted;	194 std::string converted;

195 EXPECT_EQ(convert_cases[i].success,	195 EXPECT_EQ(convert_cases[i].success,

196 WideToUTF8(convert_cases[i].utf32,	196 WideToUTF8(convert_cases[i].utf32,

197 wcslen(convert_cases[i].utf32),	197 wcslen(convert_cases[i].utf32),

198 &converted));	198 &converted));

199 std::string expected(convert_cases[i].utf8);	199 std::string expected(convert_cases[i].utf8);

200 EXPECT_EQ(expected, converted);	200 EXPECT_EQ(expected, converted);

(...skipping 19 matching lines...) Expand all Loading...
220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length());	220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length());

221 std::string expected;	221 std::string expected;

222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));	222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));

223 EXPECT_EQ(arraysize(multi) - 1, expected.length());	223 EXPECT_EQ(arraysize(multi) - 1, expected.length());

224 const std::string& converted = WideToUTF8(wmultistring);	224 const std::string& converted = WideToUTF8(wmultistring);

225 EXPECT_EQ(arraysize(multi) - 1, converted.length());	225 EXPECT_EQ(arraysize(multi) - 1, converted.length());

226 EXPECT_EQ(expected, converted);	226 EXPECT_EQ(expected, converted);

227 }	227 }

228	228

229 } // namaspace base	229 } // namaspace base

OLD	NEW

« no previous file with comments | « base/utf_string_conversions.cc ('k') | chrome/common/zip_unittest.cc » ('j') | no next file with comments »