| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include <string> | |
| 6 | |
| 7 #include "base/i18n/rtl.h" | |
| 8 #include "base/i18n/string_search.h" | |
| 9 #include "base/strings/string16.h" | |
| 10 #include "base/strings/utf_string_conversions.h" | |
| 11 #include "testing/gtest/include/gtest/gtest.h" | |
| 12 #include "third_party/icu/source/i18n/unicode/usearch.h" | |
| 13 | |
| 14 namespace base { | |
| 15 namespace i18n { | |
| 16 | |
| 17 // Note on setting default locale for testing: The current default locale on | |
| 18 // the Mac trybot is en_US_POSIX, with which primary-level collation strength | |
| 19 // string search is case-sensitive, when normally it should be | |
| 20 // case-insensitive. In other locales (including en_US which English speakers | |
| 21 // in the U.S. use), this search would be case-insensitive as expected. | |
| 22 | |
| 23 TEST(StringSearchTest, ASCII) { | |
| 24 std::string default_locale(uloc_getDefault()); | |
| 25 bool locale_is_posix = (default_locale == "en_US_POSIX"); | |
| 26 if (locale_is_posix) | |
| 27 SetICUDefaultLocale("en_US"); | |
| 28 | |
| 29 size_t index = 0; | |
| 30 size_t length = 0; | |
| 31 | |
| 32 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 33 ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length)); | |
| 34 EXPECT_EQ(0U, index); | |
| 35 EXPECT_EQ(5U, length); | |
| 36 | |
| 37 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | |
| 38 ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"), | |
| 39 &index, &length)); | |
| 40 | |
| 41 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 42 ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length)); | |
| 43 EXPECT_EQ(4U, index); | |
| 44 EXPECT_EQ(6U, length); | |
| 45 | |
| 46 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | |
| 47 ASCIIToUTF16("searching within empty string"), string16(), | |
| 48 &index, &length)); | |
| 49 | |
| 50 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 51 string16(), ASCIIToUTF16("searching for empty string"), &index, &length)); | |
| 52 EXPECT_EQ(0U, index); | |
| 53 EXPECT_EQ(0U, length); | |
| 54 | |
| 55 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 56 ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"), | |
| 57 &index, &length)); | |
| 58 EXPECT_EQ(0U, index); | |
| 59 EXPECT_EQ(18U, length); | |
| 60 | |
| 61 if (locale_is_posix) | |
| 62 SetICUDefaultLocale(default_locale.data()); | |
| 63 } | |
| 64 | |
| 65 TEST(StringSearchTest, UnicodeLocaleIndependent) { | |
| 66 // Base characters | |
| 67 const string16 e_base = WideToUTF16(L"e"); | |
| 68 const string16 E_base = WideToUTF16(L"E"); | |
| 69 const string16 a_base = WideToUTF16(L"a"); | |
| 70 | |
| 71 // Composed characters | |
| 72 const string16 e_with_acute_accent = WideToUTF16(L"\u00e9"); | |
| 73 const string16 E_with_acute_accent = WideToUTF16(L"\u00c9"); | |
| 74 const string16 e_with_grave_accent = WideToUTF16(L"\u00e8"); | |
| 75 const string16 E_with_grave_accent = WideToUTF16(L"\u00c8"); | |
| 76 const string16 a_with_acute_accent = WideToUTF16(L"\u00e1"); | |
| 77 | |
| 78 // Decomposed characters | |
| 79 const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301"); | |
| 80 const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301"); | |
| 81 const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300"); | |
| 82 const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300"); | |
| 83 const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301"); | |
| 84 | |
| 85 std::string default_locale(uloc_getDefault()); | |
| 86 bool locale_is_posix = (default_locale == "en_US_POSIX"); | |
| 87 if (locale_is_posix) | |
| 88 SetICUDefaultLocale("en_US"); | |
| 89 | |
| 90 size_t index = 0; | |
| 91 size_t length = 0; | |
| 92 | |
| 93 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 94 e_base, e_with_acute_accent, &index, &length)); | |
| 95 EXPECT_EQ(0U, index); | |
| 96 EXPECT_EQ(e_with_acute_accent.size(), length); | |
| 97 | |
| 98 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 99 e_with_acute_accent, e_base, &index, &length)); | |
| 100 EXPECT_EQ(0U, index); | |
| 101 EXPECT_EQ(e_base.size(), length); | |
| 102 | |
| 103 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 104 e_base, e_with_acute_combining_mark, &index, &length)); | |
| 105 EXPECT_EQ(0U, index); | |
| 106 EXPECT_EQ(e_with_acute_combining_mark.size(), length); | |
| 107 | |
| 108 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 109 e_with_acute_combining_mark, e_base, &index, &length)); | |
| 110 EXPECT_EQ(0U, index); | |
| 111 EXPECT_EQ(e_base.size(), length); | |
| 112 | |
| 113 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 114 e_with_acute_combining_mark, e_with_acute_accent, | |
| 115 &index, &length)); | |
| 116 EXPECT_EQ(0U, index); | |
| 117 EXPECT_EQ(e_with_acute_accent.size(), length); | |
| 118 | |
| 119 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 120 e_with_acute_accent, e_with_acute_combining_mark, | |
| 121 &index, &length)); | |
| 122 EXPECT_EQ(0U, index); | |
| 123 EXPECT_EQ(e_with_acute_combining_mark.size(), length); | |
| 124 | |
| 125 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 126 e_with_acute_combining_mark, e_with_grave_combining_mark, | |
| 127 &index, &length)); | |
| 128 EXPECT_EQ(0U, index); | |
| 129 EXPECT_EQ(e_with_grave_combining_mark.size(), length); | |
| 130 | |
| 131 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 132 e_with_grave_combining_mark, e_with_acute_combining_mark, | |
| 133 &index, &length)); | |
| 134 EXPECT_EQ(0U, index); | |
| 135 EXPECT_EQ(e_with_acute_combining_mark.size(), length); | |
| 136 | |
| 137 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 138 e_with_acute_combining_mark, e_with_grave_accent, &index, &length)); | |
| 139 EXPECT_EQ(0U, index); | |
| 140 EXPECT_EQ(e_with_grave_accent.size(), length); | |
| 141 | |
| 142 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 143 e_with_grave_accent, e_with_acute_combining_mark, &index, &length)); | |
| 144 EXPECT_EQ(0U, index); | |
| 145 EXPECT_EQ(e_with_acute_combining_mark.size(), length); | |
| 146 | |
| 147 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 148 E_with_acute_accent, e_with_acute_accent, &index, &length)); | |
| 149 EXPECT_EQ(0U, index); | |
| 150 EXPECT_EQ(e_with_acute_accent.size(), length); | |
| 151 | |
| 152 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 153 E_with_grave_accent, e_with_acute_accent, &index, &length)); | |
| 154 EXPECT_EQ(0U, index); | |
| 155 EXPECT_EQ(e_with_acute_accent.size(), length); | |
| 156 | |
| 157 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 158 E_with_acute_combining_mark, e_with_grave_accent, &index, &length)); | |
| 159 EXPECT_EQ(0U, index); | |
| 160 EXPECT_EQ(e_with_grave_accent.size(), length); | |
| 161 | |
| 162 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 163 E_with_grave_combining_mark, e_with_acute_accent, &index, &length)); | |
| 164 EXPECT_EQ(0U, index); | |
| 165 EXPECT_EQ(e_with_acute_accent.size(), length); | |
| 166 | |
| 167 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 168 E_base, e_with_grave_accent, &index, &length)); | |
| 169 EXPECT_EQ(0U, index); | |
| 170 EXPECT_EQ(e_with_grave_accent.size(), length); | |
| 171 | |
| 172 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | |
| 173 a_with_acute_accent, e_with_acute_accent, &index, &length)); | |
| 174 | |
| 175 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | |
| 176 a_with_acute_combining_mark, e_with_acute_combining_mark, | |
| 177 &index, &length)); | |
| 178 | |
| 179 if (locale_is_posix) | |
| 180 SetICUDefaultLocale(default_locale.data()); | |
| 181 } | |
| 182 | |
| 183 TEST(StringSearchTest, UnicodeLocaleDependent) { | |
| 184 // Base characters | |
| 185 const string16 a_base = WideToUTF16(L"a"); | |
| 186 | |
| 187 // Composed characters | |
| 188 const string16 a_with_ring = WideToUTF16(L"\u00e5"); | |
| 189 | |
| 190 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | |
| 191 a_base, a_with_ring, NULL, NULL)); | |
| 192 | |
| 193 const char* default_locale = uloc_getDefault(); | |
| 194 SetICUDefaultLocale("da"); | |
| 195 | |
| 196 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | |
| 197 a_base, a_with_ring, NULL, NULL)); | |
| 198 | |
| 199 SetICUDefaultLocale(default_locale); | |
| 200 } | |
| 201 | |
| 202 TEST(StringSearchTest, FixedPatternMultipleSearch) { | |
| 203 std::string default_locale(uloc_getDefault()); | |
| 204 bool locale_is_posix = (default_locale == "en_US_POSIX"); | |
| 205 if (locale_is_posix) | |
| 206 SetICUDefaultLocale("en_US"); | |
| 207 | |
| 208 size_t index = 0; | |
| 209 size_t length = 0; | |
| 210 | |
| 211 // Search "hello" over multiple texts. | |
| 212 FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello")); | |
| 213 EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length)); | |
| 214 EXPECT_EQ(2U, index); | |
| 215 EXPECT_EQ(5U, length); | |
| 216 EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length)); | |
| 217 EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length)); | |
| 218 EXPECT_EQ(0U, index); | |
| 219 EXPECT_EQ(5U, length); | |
| 220 | |
| 221 if (locale_is_posix) | |
| 222 SetICUDefaultLocale(default_locale.data()); | |
| 223 } | |
| 224 | |
| 225 } // namespace i18n | |
| 226 } // namespace base | |
| OLD | NEW |