Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(183)

Side by Side Diff: base/i18n/icu_string_conversions_unittest.cc

Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « base/i18n/icu_string_conversions.cc ('k') | base/string_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <math.h> 5 #include <math.h>
6 #include <stdarg.h> 6 #include <stdarg.h>
7 7
8 #include <limits> 8 #include <limits>
9 #include <sstream> 9 #include <sstream>
10 10
11 #include "base/basictypes.h" 11 #include "base/basictypes.h"
12 #include "base/i18n/icu_string_conversions.h"
12 #include "base/logging.h" 13 #include "base/logging.h"
13 #include "base/utf_string_conversions.h" 14 #include "base/utf_string_conversions.h"
14 #include "base/i18n/icu_string_conversions.h"
15 #include "testing/gtest/include/gtest/gtest.h" 15 #include "testing/gtest/include/gtest/gtest.h"
16 16
17 namespace base { 17 namespace base {
18 18
19 namespace { 19 namespace {
20 20
21 // Given a null-terminated string of wchar_t with each wchar_t representing 21 // Given a null-terminated string of wchar_t with each wchar_t representing
22 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. 22 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
23 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF) 23 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
24 // should be represented as a surrogate pair (two UTF-16 units) 24 // should be represented as a surrogate pair (two UTF-16 units)
25 // *even* where wchar_t is 32-bit (Linux and Mac). 25 // *even* where wchar_t is 32-bit (Linux and Mac).
26 // 26 //
27 // This is to help write tests for functions with string16 params until 27 // This is to help write tests for functions with string16 params until
28 // the C++ 0x UTF-16 literal is well-supported by compilers. 28 // the C++ 0x UTF-16 literal is well-supported by compilers.
29 string16 BuildString16(const wchar_t* s) { 29 string16 BuildString16(const wchar_t* s) {
30 #if defined(WCHAR_T_IS_UTF16) 30 #if defined(WCHAR_T_IS_UTF16)
31 return string16(s); 31 return string16(s);
32 #elif defined(WCHAR_T_IS_UTF32) 32 #elif defined(WCHAR_T_IS_UTF32)
33 string16 u16; 33 string16 u16;
34 while (*s != 0) { 34 while (*s != 0) {
35 DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu); 35 DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);
36 u16.push_back(*s++); 36 u16.push_back(*s++);
37 } 37 }
38 return u16; 38 return u16;
39 #endif 39 #endif
40 } 40 }
41 41
42 static const wchar_t* const kConvertRoundtripCases[] = { 42 const wchar_t* const kConvertRoundtripCases[] = {
43 L"Google Video", 43 L"Google Video",
44 // "网页 图片 资讯更多 »" 44 // "网页 图片 资讯更多 »"
45 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", 45 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
46 // "Παγκόσμιος Ιστός" 46 // "Παγκόσμιος Ιστός"
47 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" 47 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
48 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", 48 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
49 // "Поиск страниц на русском" 49 // "Поиск страниц на русском"
50 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" 50 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
51 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" 51 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
52 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", 52 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
53 // "전체서비스" 53 // "전체서비스"
54 L"\xc804\xccb4\xc11c\xbe44\xc2a4", 54 L"\xc804\xccb4\xc11c\xbe44\xc2a4",
55 55
56 // Test characters that take more than 16 bits. This will depend on whether 56 // Test characters that take more than 16 bits. This will depend on whether
57 // wchar_t is 16 or 32 bits. 57 // wchar_t is 16 or 32 bits.
58 #if defined(WCHAR_T_IS_UTF16) 58 #if defined(WCHAR_T_IS_UTF16)
59 L"\xd800\xdf00", 59 L"\xd800\xdf00",
60 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 60 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
61 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", 61 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
62 #elif defined(WCHAR_T_IS_UTF32) 62 #elif defined(WCHAR_T_IS_UTF32)
63 L"\x10300", 63 L"\x10300",
64 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 64 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
65 L"\x11d40\x11d41\x11d42\x11d43\x11d44", 65 L"\x11d40\x11d41\x11d42\x11d43\x11d44",
66 #endif 66 #endif
67 }; 67 };
68 68
69 } // namespace 69 } // namespace
70 70
71 TEST(StringUtilTest, ConvertCodepageUTF8) { 71 TEST(ICUStringConversionsTest, ConvertCodepageUTF8) {
72 // Make sure WideToCodepage works like WideToUTF8. 72 // Make sure WideToCodepage works like WideToUTF8.
73 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 73 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
74 std::string expected(WideToUTF8(kConvertRoundtripCases[i])); 74 std::string expected(WideToUTF8(kConvertRoundtripCases[i]));
75 std::string utf8; 75 std::string utf8;
76 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8, 76 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,
77 OnStringConversionError::SKIP, &utf8)); 77 OnStringConversionError::SKIP, &utf8));
78 EXPECT_EQ(expected, utf8); 78 EXPECT_EQ(expected, utf8);
79 } 79 }
80 } 80 }
81 81
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 true, 149 true,
150 L"\x00A5\x00A8", 150 L"\x00A5\x00A8",
151 NULL}, 151 NULL},
152 // Chinese (GB18030) : A 4 byte sequence mapped to plane 2 (U+20000) 152 // Chinese (GB18030) : A 4 byte sequence mapped to plane 2 (U+20000)
153 {"gb18030", 153 {"gb18030",
154 "\x95\x32\x82\x36\xD2\xBB", 154 "\x95\x32\x82\x36\xD2\xBB",
155 OnStringConversionError::FAIL, 155 OnStringConversionError::FAIL,
156 true, 156 true,
157 #if defined(WCHAR_T_IS_UTF16) 157 #if defined(WCHAR_T_IS_UTF16)
158 L"\xD840\xDC00\x4E00", 158 L"\xD840\xDC00\x4E00",
159 #else 159 #elif defined(WCHAR_T_IS_UTF32)
160 L"\x20000\x4E00", 160 L"\x20000\x4E00",
161 #endif 161 #endif
162 L"\xD840\xDC00\x4E00"}, 162 L"\xD840\xDC00\x4E00"},
163 {"big5", 163 {"big5",
164 "\xA7\x41\xA6\x6E", 164 "\xA7\x41\xA6\x6E",
165 OnStringConversionError::FAIL, 165 OnStringConversionError::FAIL,
166 true, 166 true,
167 L"\x4F60\x597D", 167 L"\x4F60\x597D",
168 NULL}, 168 NULL},
169 // Greek (ISO-8859) 169 // Greek (ISO-8859)
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
227 // Thai (windows-874) 227 // Thai (windows-874)
228 {"windows-874", 228 {"windows-874",
229 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA", 229 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",
230 OnStringConversionError::FAIL, 230 OnStringConversionError::FAIL,
231 true, 231 true,
232 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35" 232 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"
233 L"\x0E04\x0E23\x0e31\x0E1A", 233 L"\x0E04\x0E23\x0e31\x0E1A",
234 NULL}, 234 NULL},
235 }; 235 };
236 236
237 TEST(StringUtilTest, ConvertBetweenCodepageAndWide) { 237 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndWide) {
238 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) { 238 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
239 std::wstring wide; 239 std::wstring wide;
240 bool success = CodepageToWide(kConvertCodepageCases[i].encoded, 240 bool success = CodepageToWide(kConvertCodepageCases[i].encoded,
241 kConvertCodepageCases[i].codepage_name, 241 kConvertCodepageCases[i].codepage_name,
242 kConvertCodepageCases[i].on_error, 242 kConvertCodepageCases[i].on_error,
243 &wide); 243 &wide);
244 EXPECT_EQ(kConvertCodepageCases[i].success, success); 244 EXPECT_EQ(kConvertCodepageCases[i].success, success);
245 EXPECT_EQ(kConvertCodepageCases[i].wide, wide); 245 EXPECT_EQ(kConvertCodepageCases[i].wide, wide);
246 246
247 // When decoding was successful and nothing was skipped, we also check the 247 // When decoding was successful and nothing was skipped, we also check the
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
289 // Invalid characters should fail. 289 // Invalid characters should fail.
290 EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1", 290 EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",
291 OnStringConversionError::SKIP, &encoded)); 291 OnStringConversionError::SKIP, &encoded));
292 EXPECT_STREQ("az", encoded.c_str()); 292 EXPECT_STREQ("az", encoded.c_str());
293 293
294 // Invalid codepages should fail. 294 // Invalid codepages should fail.
295 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2", 295 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",
296 OnStringConversionError::SKIP, &encoded)); 296 OnStringConversionError::SKIP, &encoded));
297 } 297 }
298 298
299 TEST(StringUtilTest, ConvertBetweenCodepageAndUTF16) { 299 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {
300 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) { 300 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
301 string16 utf16; 301 string16 utf16;
302 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded, 302 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,
303 kConvertCodepageCases[i].codepage_name, 303 kConvertCodepageCases[i].codepage_name,
304 kConvertCodepageCases[i].on_error, 304 kConvertCodepageCases[i].on_error,
305 &utf16); 305 &utf16);
306 string16 utf16_expected; 306 string16 utf16_expected;
307 if (kConvertCodepageCases[i].u16_wide == NULL) 307 if (kConvertCodepageCases[i].u16_wide == NULL)
308 utf16_expected = BuildString16(kConvertCodepageCases[i].wide); 308 utf16_expected = BuildString16(kConvertCodepageCases[i].wide);
309 else 309 else
310 utf16_expected = BuildString16(kConvertCodepageCases[i].u16_wide); 310 utf16_expected = BuildString16(kConvertCodepageCases[i].u16_wide);
311 EXPECT_EQ(kConvertCodepageCases[i].success, success); 311 EXPECT_EQ(kConvertCodepageCases[i].success, success);
312 EXPECT_EQ(utf16_expected, utf16); 312 EXPECT_EQ(utf16_expected, utf16);
313 313
314 // When decoding was successful and nothing was skipped, we also check the 314 // When decoding was successful and nothing was skipped, we also check the
315 // reverse conversion. See also the corresponding comment in 315 // reverse conversion. See also the corresponding comment in
316 // ConvertBetweenCodepageAndWide. 316 // ConvertBetweenCodepageAndWide.
317 if (success && 317 if (success &&
318 kConvertCodepageCases[i].on_error == OnStringConversionError::FAIL) { 318 kConvertCodepageCases[i].on_error == OnStringConversionError::FAIL) {
319 std::string encoded; 319 std::string encoded;
320 success = UTF16ToCodepage(utf16, kConvertCodepageCases[i].codepage_name, 320 success = UTF16ToCodepage(utf16, kConvertCodepageCases[i].codepage_name,
321 kConvertCodepageCases[i].on_error, &encoded); 321 kConvertCodepageCases[i].on_error, &encoded);
322 EXPECT_EQ(kConvertCodepageCases[i].success, success); 322 EXPECT_EQ(kConvertCodepageCases[i].success, success);
323 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded); 323 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);
324 } 324 }
325 } 325 }
326 } 326 }
327 327
328 static const struct {
329 const char* codepage_name;
330 const char* encoded;
331 size_t input_offset;
332 size_t u16_output_offset;
333 size_t wide_output_offset;
334 } kAdjustOffsetCases[] = {
335 {"gb2312", "", 0, string16::npos, std::wstring::npos},
336 {"gb2312", "\xC4\xE3\xBA\xC3", 0, 0, 0},
337 {"gb2312", "\xC4\xE3\xBA\xC3", 2, 1, 1},
338 {"gb2312", "\xC4\xE3\xBA\xC3", 4, string16::npos, std::wstring::npos},
339 {"gb2312", "\xC4\xE3\xBA\xC3", 1, string16::npos, std::wstring::npos},
340 {"gb2312", "\xC4\xE3\xBA\xC3", std::string::npos, string16::npos,
341 std::wstring::npos},
342 {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 2, string16::npos,
343 std::wstring::npos},
344 {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 4, 2, 1},
345 };
346
347 TEST(ICUStringConversionsTest, AdjustOffset) {
348 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kAdjustOffsetCases); ++i) {
349 string16 utf16;
350 size_t offset = kAdjustOffsetCases[i].input_offset;
351 EXPECT_TRUE(CodepageToUTF16AndAdjustOffset(kAdjustOffsetCases[i].encoded,
352 kAdjustOffsetCases[i].codepage_name,
353 OnStringConversionError::FAIL, &utf16, &offset));
354 EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
355
356 std::wstring wide;
357 offset = kAdjustOffsetCases[i].input_offset;
358 CodepageToWideAndAdjustOffset(kAdjustOffsetCases[i].encoded,
359 kAdjustOffsetCases[i].codepage_name,
360 OnStringConversionError::FAIL, &wide, &offset);
361 #if defined(WCHAR_T_IS_UTF16)
362 EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
363 #elif defined(WCHAR_T_IS_UTF32)
364 EXPECT_EQ(kAdjustOffsetCases[i].wide_output_offset, offset);
365 #endif
366 }
367 }
368
328 } // namespace base 369 } // namespace base
OLDNEW
« no previous file with comments | « base/i18n/icu_string_conversions.cc ('k') | base/string_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698