OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/i18n/case_conversion.h" | 5 #include "base/i18n/case_conversion.h" |
6 #include "base/i18n/rtl.h" | 6 #include "base/i18n/rtl.h" |
7 #include "base/strings/utf_string_conversions.h" | 7 #include "base/strings/utf_string_conversions.h" |
8 #include "testing/gtest/include/gtest/gtest.h" | 8 #include "testing/gtest/include/gtest/gtest.h" |
9 #include "third_party/icu/source/i18n/unicode/usearch.h" | 9 #include "third_party/icu/source/i18n/unicode/usearch.h" |
10 | 10 |
11 namespace base { | 11 namespace base { |
| 12 namespace i18n { |
| 13 |
12 namespace { | 14 namespace { |
13 | 15 |
| 16 const wchar_t kNonASCIIMixed[] = |
| 17 L"\xC4\xD6\xE4\xF6\x20\xCF\xEF\x20\xF7\x25" |
| 18 L"\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07\x1F0F" |
| 19 L"\x20\x1E00\x1E01"; |
| 20 const wchar_t kNonASCIILower[] = |
| 21 L"\xE4\xF6\xE4\xF6\x20\xEF\xEF" |
| 22 L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07" |
| 23 L"\x1F07\x20\x1E01\x1E01"; |
| 24 const wchar_t kNonASCIIUpper[] = |
| 25 L"\xC4\xD6\xC4\xD6\x20\xCF\xCF" |
| 26 L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F0F" |
| 27 L"\x1F0F\x20\x1E00\x1E00"; |
| 28 |
| 29 } // namespace |
| 30 |
14 // Test upper and lower case string conversion. | 31 // Test upper and lower case string conversion. |
15 TEST(CaseConversionTest, UpperLower) { | 32 TEST(CaseConversionTest, UpperLower) { |
16 const string16 mixed(ASCIIToUTF16("Text with UPPer & lowER casE.")); | 33 const string16 mixed(ASCIIToUTF16("Text with UPPer & lowER casE.")); |
17 const string16 expected_lower(ASCIIToUTF16("text with upper & lower case.")); | 34 const string16 expected_lower(ASCIIToUTF16("text with upper & lower case.")); |
18 const string16 expected_upper(ASCIIToUTF16("TEXT WITH UPPER & LOWER CASE.")); | 35 const string16 expected_upper(ASCIIToUTF16("TEXT WITH UPPER & LOWER CASE.")); |
19 | 36 |
20 string16 result = base::i18n::ToLower(mixed); | 37 string16 result = ToLower(mixed); |
21 EXPECT_EQ(expected_lower, result); | 38 EXPECT_EQ(expected_lower, result); |
22 | 39 |
23 result = base::i18n::ToUpper(mixed); | 40 result = ToUpper(mixed); |
24 EXPECT_EQ(expected_upper, result); | 41 EXPECT_EQ(expected_upper, result); |
25 } | 42 } |
26 | 43 |
27 TEST(CaseConversionTest, NonASCII) { | 44 TEST(CaseConversionTest, NonASCII) { |
28 const string16 mixed(WideToUTF16( | 45 const string16 mixed(WideToUTF16(kNonASCIIMixed)); |
29 L"\xC4\xD6\xE4\xF6\x20\xCF\xEF\x20\xF7\x25" | 46 const string16 expected_lower(WideToUTF16(kNonASCIILower)); |
30 L"\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07\x1F0F" | 47 const string16 expected_upper(WideToUTF16(kNonASCIIUpper)); |
31 L"\x20\x1E00\x1E01")); | |
32 const string16 expected_lower(WideToUTF16( | |
33 L"\xE4\xF6\xE4\xF6\x20\xEF\xEF" | |
34 L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07" | |
35 L"\x1F07\x20\x1E01\x1E01")); | |
36 const string16 expected_upper(WideToUTF16( | |
37 L"\xC4\xD6\xC4\xD6\x20\xCF\xCF" | |
38 L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F0F" | |
39 L"\x1F0F\x20\x1E00\x1E00")); | |
40 | 48 |
41 string16 result = base::i18n::ToLower(mixed); | 49 string16 result = ToLower(mixed); |
42 EXPECT_EQ(expected_lower, result); | 50 EXPECT_EQ(expected_lower, result); |
43 | 51 |
44 result = base::i18n::ToUpper(mixed); | 52 result = ToUpper(mixed); |
45 EXPECT_EQ(expected_upper, result); | 53 EXPECT_EQ(expected_upper, result); |
46 } | 54 } |
47 | 55 |
48 TEST(CaseConversionTest, TurkishLocaleConversion) { | 56 TEST(CaseConversionTest, TurkishLocaleConversion) { |
49 const string16 mixed(WideToUTF16(L"\x49\x131")); | 57 const string16 mixed(WideToUTF16(L"\x49\x131")); |
50 const string16 expected_lower(WideToUTF16(L"\x69\x131")); | 58 const string16 expected_lower(WideToUTF16(L"\x69\x131")); |
51 const string16 expected_upper(WideToUTF16(L"\x49\x49")); | 59 const string16 expected_upper(WideToUTF16(L"\x49\x49")); |
52 | 60 |
53 std::string default_locale(uloc_getDefault()); | 61 std::string default_locale(uloc_getDefault()); |
54 i18n::SetICUDefaultLocale("en_US"); | 62 i18n::SetICUDefaultLocale("en_US"); |
55 | 63 |
56 string16 result = base::i18n::ToLower(mixed); | 64 string16 result = ToLower(mixed); |
57 EXPECT_EQ(expected_lower, result); | 65 EXPECT_EQ(expected_lower, result); |
58 | 66 |
59 result = base::i18n::ToUpper(mixed); | 67 result = ToUpper(mixed); |
60 EXPECT_EQ(expected_upper, result); | 68 EXPECT_EQ(expected_upper, result); |
61 | 69 |
62 i18n::SetICUDefaultLocale("tr"); | 70 i18n::SetICUDefaultLocale("tr"); |
63 | 71 |
64 const string16 expected_lower_turkish(WideToUTF16(L"\x131\x131")); | 72 const string16 expected_lower_turkish(WideToUTF16(L"\x131\x131")); |
65 const string16 expected_upper_turkish(WideToUTF16(L"\x49\x49")); | 73 const string16 expected_upper_turkish(WideToUTF16(L"\x49\x49")); |
66 | 74 |
67 result = base::i18n::ToLower(mixed); | 75 result = ToLower(mixed); |
68 EXPECT_EQ(expected_lower_turkish, result); | 76 EXPECT_EQ(expected_lower_turkish, result); |
69 | 77 |
70 result = base::i18n::ToUpper(mixed); | 78 result = ToUpper(mixed); |
71 EXPECT_EQ(expected_upper_turkish, result); | 79 EXPECT_EQ(expected_upper_turkish, result); |
72 | 80 |
73 base::i18n::SetICUDefaultLocale(default_locale.data()); | 81 SetICUDefaultLocale(default_locale.data()); |
74 } | 82 } |
75 | 83 |
76 } // namespace | 84 TEST(CaseConversionTest, FoldCase) { |
| 85 // Simple ASCII, should lower-case. |
| 86 EXPECT_EQ(ASCIIToUTF16("hello, world"), |
| 87 FoldCase(ASCIIToUTF16("Hello, World"))); |
| 88 |
| 89 // Non-ASCII cases from above. They should all fold to the same result. |
| 90 EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)), |
| 91 FoldCase(WideToUTF16(kNonASCIILower))); |
| 92 EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)), |
| 93 FoldCase(WideToUTF16(kNonASCIIUpper))); |
| 94 |
| 95 // Turkish cases from above. This is the lower-case expected result from the |
| 96 // US locale. It should be the same even when the current locale is Turkish. |
| 97 const string16 turkish(WideToUTF16(L"\x49\x131")); |
| 98 const string16 turkish_expected(WideToUTF16(L"\x69\x131")); |
| 99 |
| 100 std::string default_locale(uloc_getDefault()); |
| 101 i18n::SetICUDefaultLocale("en_US"); |
| 102 EXPECT_EQ(turkish_expected, FoldCase(turkish)); |
| 103 |
| 104 i18n::SetICUDefaultLocale("tr"); |
| 105 EXPECT_EQ(turkish_expected, FoldCase(turkish)); |
| 106 |
| 107 // Test a case that gets bigger when processed. |
| 108 // U+130 = LATIN CAPITAL LETTER I WITH DOT ABOVE gets folded to a lower case |
| 109 // "i" followed by U+307 COMBINING DOT ABOVE. |
| 110 EXPECT_EQ(WideToUTF16(L"i\u0307j"), FoldCase(WideToUTF16(L"\u0130j"))); |
| 111 |
| 112 // U+00DF (SHARP S) and U+1E9E (CAPIRAL SHARP S) are both folded to "ss". |
| 113 EXPECT_EQ(ASCIIToUTF16("ssss"), FoldCase(WideToUTF16(L"\u00DF\u1E9E"))); |
| 114 } |
| 115 |
| 116 } // namespace i18n |
77 } // namespace base | 117 } // namespace base |
78 | 118 |
79 | 119 |
80 | 120 |
OLD | NEW |