base/i18n/icu_string_conversions_unittest.cc - Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur...

Side by Side Diff: base/i18n/icu_string_conversions_unittest.cc

Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <math.h>	5 #include <math.h>

6 #include <stdarg.h>	6 #include <stdarg.h>

7	7

8 #include <limits>	8 #include <limits>

9 #include <sstream>	9 #include <sstream>

10	10

11 #include "base/basictypes.h"	11 #include "base/basictypes.h"

	12 #include "base/i18n/icu_string_conversions.h"

12 #include "base/logging.h"	13 #include "base/logging.h"

13 #include "base/utf_string_conversions.h"	14 #include "base/utf_string_conversions.h"

14 #include "base/i18n/icu_string_conversions.h"

15 #include "testing/gtest/include/gtest/gtest.h"	15 #include "testing/gtest/include/gtest/gtest.h"

16	16

17 namespace base {	17 namespace base {

18	18

19 namespace {	19 namespace {

20	20

21 // Given a null-terminated string of wchar_t with each wchar_t representing	21 // Given a null-terminated string of wchar_t with each wchar_t representing

22 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.	22 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.

23 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)	23 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)

24 // should be represented as a surrogate pair (two UTF-16 units)	24 // should be represented as a surrogate pair (two UTF-16 units)

25 // even where wchar_t is 32-bit (Linux and Mac).	25 // even where wchar_t is 32-bit (Linux and Mac).

26 //	26 //

27 // This is to help write tests for functions with string16 params until	27 // This is to help write tests for functions with string16 params until

28 // the C++ 0x UTF-16 literal is well-supported by compilers.	28 // the C++ 0x UTF-16 literal is well-supported by compilers.

29 string16 BuildString16(const wchar_t* s) {	29 string16 BuildString16(const wchar_t* s) {

30 #if defined(WCHAR_T_IS_UTF16)	30 #if defined(WCHAR_T_IS_UTF16)

31 return string16(s);	31 return string16(s);

32 #elif defined(WCHAR_T_IS_UTF32)	32 #elif defined(WCHAR_T_IS_UTF32)

33 string16 u16;	33 string16 u16;

34 while (*s != 0) {	34 while (*s != 0) {

35 DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);	35 DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);

36 u16.push_back(*s++);	36 u16.push_back(*s++);

37 }	37 }

38 return u16;	38 return u16;

39 #endif	39 #endif

40 }	40 }

41	41

42 static const wchar_t* const kConvertRoundtripCases[] = {	42 const wchar_t* const kConvertRoundtripCases[] = {

43 L"Google Video",	43 L"Google Video",

44 // "网页图片资讯更多 »"	44 // "网页图片资讯更多 »"

45 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",	45 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",

46 // "Παγκόσμιος Ιστός"	46 // "Παγκόσμιος Ιστός"

47 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"	47 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"

48 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",	48 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",

49 // "Поиск страниц на русском"	49 // "Поиск страниц на русском"

50 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"	50 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"

51 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"	51 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"

52 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",	52 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",

53 // "전체서비스"	53 // "전체서비스"

54 L"\xc804\xccb4\xc11c\xbe44\xc2a4",	54 L"\xc804\xccb4\xc11c\xbe44\xc2a4",

55	55

56 // Test characters that take more than 16 bits. This will depend on whether	56 // Test characters that take more than 16 bits. This will depend on whether

57 // wchar_t is 16 or 32 bits.	57 // wchar_t is 16 or 32 bits.

58 #if defined(WCHAR_T_IS_UTF16)	58 #if defined(WCHAR_T_IS_UTF16)

59 L"\xd800\xdf00",	59 L"\xd800\xdf00",

60 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)	60 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)

61 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",	61 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",

62 #elif defined(WCHAR_T_IS_UTF32)	62 #elif defined(WCHAR_T_IS_UTF32)

63 L"\x10300",	63 L"\x10300",

64 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)	64 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)

65 L"\x11d40\x11d41\x11d42\x11d43\x11d44",	65 L"\x11d40\x11d41\x11d42\x11d43\x11d44",

66 #endif	66 #endif

67 };	67 };

68	68

69 } // namespace	69 } // namespace

70	70

71 TEST(StringUtilTest, ConvertCodepageUTF8) {	71 TEST(ICUStringConversionsTest, ConvertCodepageUTF8) {

72 // Make sure WideToCodepage works like WideToUTF8.	72 // Make sure WideToCodepage works like WideToUTF8.

73 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {	73 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {

74 std::string expected(WideToUTF8(kConvertRoundtripCases[i]));	74 std::string expected(WideToUTF8(kConvertRoundtripCases[i]));

75 std::string utf8;	75 std::string utf8;

76 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,	76 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,

77 OnStringConversionError::SKIP, &utf8));	77 OnStringConversionError::SKIP, &utf8));

78 EXPECT_EQ(expected, utf8);	78 EXPECT_EQ(expected, utf8);

79 }	79 }

80 }	80 }

81	81

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
149 true,	149 true,

150 L"\x00A5\x00A8",	150 L"\x00A5\x00A8",

151 NULL},	151 NULL},

152 // Chinese (GB18030) : A 4 byte sequence mapped to plane 2 (U+20000)	152 // Chinese (GB18030) : A 4 byte sequence mapped to plane 2 (U+20000)

153 {"gb18030",	153 {"gb18030",

154 "\x95\x32\x82\x36\xD2\xBB",	154 "\x95\x32\x82\x36\xD2\xBB",

155 OnStringConversionError::FAIL,	155 OnStringConversionError::FAIL,

156 true,	156 true,

157 #if defined(WCHAR_T_IS_UTF16)	157 #if defined(WCHAR_T_IS_UTF16)

158 L"\xD840\xDC00\x4E00",	158 L"\xD840\xDC00\x4E00",

159 #else	159 #elif defined(WCHAR_T_IS_UTF32)

160 L"\x20000\x4E00",	160 L"\x20000\x4E00",

161 #endif	161 #endif

162 L"\xD840\xDC00\x4E00"},	162 L"\xD840\xDC00\x4E00"},

163 {"big5",	163 {"big5",

164 "\xA7\x41\xA6\x6E",	164 "\xA7\x41\xA6\x6E",

165 OnStringConversionError::FAIL,	165 OnStringConversionError::FAIL,

166 true,	166 true,

167 L"\x4F60\x597D",	167 L"\x4F60\x597D",

168 NULL},	168 NULL},

169 // Greek (ISO-8859)	169 // Greek (ISO-8859)

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
227 // Thai (windows-874)	227 // Thai (windows-874)

228 {"windows-874",	228 {"windows-874",

229 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",	229 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",

230 OnStringConversionError::FAIL,	230 OnStringConversionError::FAIL,

231 true,	231 true,

232 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"	232 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"

233 L"\x0E04\x0E23\x0e31\x0E1A",	233 L"\x0E04\x0E23\x0e31\x0E1A",

234 NULL},	234 NULL},

235 };	235 };

236	236

237 TEST(StringUtilTest, ConvertBetweenCodepageAndWide) {	237 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndWide) {

238 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {	238 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {

239 std::wstring wide;	239 std::wstring wide;

240 bool success = CodepageToWide(kConvertCodepageCases[i].encoded,	240 bool success = CodepageToWide(kConvertCodepageCases[i].encoded,

241 kConvertCodepageCases[i].codepage_name,	241 kConvertCodepageCases[i].codepage_name,

242 kConvertCodepageCases[i].on_error,	242 kConvertCodepageCases[i].on_error,

243 &wide);	243 &wide);

244 EXPECT_EQ(kConvertCodepageCases[i].success, success);	244 EXPECT_EQ(kConvertCodepageCases[i].success, success);

245 EXPECT_EQ(kConvertCodepageCases[i].wide, wide);	245 EXPECT_EQ(kConvertCodepageCases[i].wide, wide);

246	246

247 // When decoding was successful and nothing was skipped, we also check the	247 // When decoding was successful and nothing was skipped, we also check the

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
289 // Invalid characters should fail.	289 // Invalid characters should fail.

290 EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",	290 EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",

291 OnStringConversionError::SKIP, &encoded));	291 OnStringConversionError::SKIP, &encoded));

292 EXPECT_STREQ("az", encoded.c_str());	292 EXPECT_STREQ("az", encoded.c_str());

293	293

294 // Invalid codepages should fail.	294 // Invalid codepages should fail.

295 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",	295 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",

296 OnStringConversionError::SKIP, &encoded));	296 OnStringConversionError::SKIP, &encoded));

297 }	297 }

298	298

299 TEST(StringUtilTest, ConvertBetweenCodepageAndUTF16) {	299 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {

300 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {	300 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {

301 string16 utf16;	301 string16 utf16;

302 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,	302 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,

303 kConvertCodepageCases[i].codepage_name,	303 kConvertCodepageCases[i].codepage_name,

304 kConvertCodepageCases[i].on_error,	304 kConvertCodepageCases[i].on_error,

305 &utf16);	305 &utf16);

306 string16 utf16_expected;	306 string16 utf16_expected;

307 if (kConvertCodepageCases[i].u16_wide == NULL)	307 if (kConvertCodepageCases[i].u16_wide == NULL)

308 utf16_expected = BuildString16(kConvertCodepageCases[i].wide);	308 utf16_expected = BuildString16(kConvertCodepageCases[i].wide);

309 else	309 else

310 utf16_expected = BuildString16(kConvertCodepageCases[i].u16_wide);	310 utf16_expected = BuildString16(kConvertCodepageCases[i].u16_wide);

311 EXPECT_EQ(kConvertCodepageCases[i].success, success);	311 EXPECT_EQ(kConvertCodepageCases[i].success, success);

312 EXPECT_EQ(utf16_expected, utf16);	312 EXPECT_EQ(utf16_expected, utf16);

313	313

314 // When decoding was successful and nothing was skipped, we also check the	314 // When decoding was successful and nothing was skipped, we also check the

315 // reverse conversion. See also the corresponding comment in	315 // reverse conversion. See also the corresponding comment in

316 // ConvertBetweenCodepageAndWide.	316 // ConvertBetweenCodepageAndWide.

317 if (success &&	317 if (success &&

318 kConvertCodepageCases[i].on_error == OnStringConversionError::FAIL) {	318 kConvertCodepageCases[i].on_error == OnStringConversionError::FAIL) {

319 std::string encoded;	319 std::string encoded;

320 success = UTF16ToCodepage(utf16, kConvertCodepageCases[i].codepage_name,	320 success = UTF16ToCodepage(utf16, kConvertCodepageCases[i].codepage_name,

321 kConvertCodepageCases[i].on_error, &encoded);	321 kConvertCodepageCases[i].on_error, &encoded);

322 EXPECT_EQ(kConvertCodepageCases[i].success, success);	322 EXPECT_EQ(kConvertCodepageCases[i].success, success);

323 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);	323 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);

324 }	324 }

325 }	325 }

326 }	326 }

327	327

	328 static const struct {

	329 const char* codepage_name;

	330 const char* encoded;

	331 size_t input_offset;

	332 size_t u16_output_offset;

	333 size_t wide_output_offset;

	334 } kAdjustOffsetCases[] = {

	335 {"gb2312", "", 0, string16::npos, std::wstring::npos},

	336 {"gb2312", "\xC4\xE3\xBA\xC3", 0, 0, 0},

	337 {"gb2312", "\xC4\xE3\xBA\xC3", 2, 1, 1},

	338 {"gb2312", "\xC4\xE3\xBA\xC3", 4, string16::npos, std::wstring::npos},

	339 {"gb2312", "\xC4\xE3\xBA\xC3", 1, string16::npos, std::wstring::npos},

	340 {"gb2312", "\xC4\xE3\xBA\xC3", std::string::npos, string16::npos,

	341 std::wstring::npos},

	342 {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 2, string16::npos,

	343 std::wstring::npos},

	344 {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 4, 2, 1},

	345 };

	346

	347 TEST(ICUStringConversionsTest, AdjustOffset) {

	348 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kAdjustOffsetCases); ++i) {

	349 string16 utf16;

	350 size_t offset = kAdjustOffsetCases[i].input_offset;

	351 EXPECT_TRUE(CodepageToUTF16AndAdjustOffset(kAdjustOffsetCases[i].encoded,

	352 kAdjustOffsetCases[i].codepage_name,

	353 OnStringConversionError::FAIL, &utf16, &offset));

	354 EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);

	355

	356 std::wstring wide;

	357 offset = kAdjustOffsetCases[i].input_offset;

	358 CodepageToWideAndAdjustOffset(kAdjustOffsetCases[i].encoded,

	359 kAdjustOffsetCases[i].codepage_name,

	360 OnStringConversionError::FAIL, &wide, &offset);

	361 #if defined(WCHAR_T_IS_UTF16)

	362 EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);

	363 #elif defined(WCHAR_T_IS_UTF32)

	364 EXPECT_EQ(kAdjustOffsetCases[i].wide_output_offset, offset);

	365 #endif

	366 }

	367 }

	368

328 } // namespace base	369 } // namespace base

OLD	NEW

« no previous file with comments | « base/i18n/icu_string_conversions.cc ('k') | base/string_util_unittest.cc » ('j') | no next file with comments »