| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/i18n/break_iterator.h" | |
| 6 | |
| 7 #include "base/strings/string_piece.h" | |
| 8 #include "base/strings/stringprintf.h" | |
| 9 #include "base/strings/utf_string_conversions.h" | |
| 10 #include "testing/gtest/include/gtest/gtest.h" | |
| 11 | |
| 12 namespace base { | |
| 13 namespace i18n { | |
| 14 | |
| 15 TEST(BreakIteratorTest, BreakWordEmpty) { | |
| 16 string16 empty; | |
| 17 BreakIterator iter(empty, BreakIterator::BREAK_WORD); | |
| 18 ASSERT_TRUE(iter.Init()); | |
| 19 EXPECT_FALSE(iter.Advance()); | |
| 20 EXPECT_FALSE(iter.IsWord()); | |
| 21 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 22 EXPECT_FALSE(iter.IsWord()); | |
| 23 } | |
| 24 | |
| 25 TEST(BreakIteratorTest, BreakWord) { | |
| 26 string16 space(UTF8ToUTF16(" ")); | |
| 27 string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); | |
| 28 BreakIterator iter(str, BreakIterator::BREAK_WORD); | |
| 29 ASSERT_TRUE(iter.Init()); | |
| 30 EXPECT_TRUE(iter.Advance()); | |
| 31 EXPECT_FALSE(iter.IsWord()); | |
| 32 EXPECT_EQ(space, iter.GetString()); | |
| 33 EXPECT_TRUE(iter.Advance()); | |
| 34 EXPECT_TRUE(iter.IsWord()); | |
| 35 EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString()); | |
| 36 EXPECT_TRUE(iter.Advance()); | |
| 37 EXPECT_FALSE(iter.IsWord()); | |
| 38 EXPECT_EQ(space, iter.GetString()); | |
| 39 EXPECT_TRUE(iter.Advance()); | |
| 40 EXPECT_TRUE(iter.IsWord()); | |
| 41 EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString()); | |
| 42 EXPECT_TRUE(iter.Advance()); | |
| 43 EXPECT_FALSE(iter.IsWord()); | |
| 44 EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString()); | |
| 45 EXPECT_TRUE(iter.Advance()); | |
| 46 EXPECT_FALSE(iter.IsWord()); | |
| 47 EXPECT_EQ(space, iter.GetString()); | |
| 48 EXPECT_TRUE(iter.Advance()); | |
| 49 EXPECT_FALSE(iter.IsWord()); | |
| 50 EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString()); | |
| 51 EXPECT_TRUE(iter.Advance()); | |
| 52 EXPECT_TRUE(iter.IsWord()); | |
| 53 EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString()); | |
| 54 EXPECT_TRUE(iter.Advance()); | |
| 55 EXPECT_FALSE(iter.IsWord()); | |
| 56 EXPECT_EQ(space, iter.GetString()); | |
| 57 EXPECT_TRUE(iter.Advance()); | |
| 58 EXPECT_TRUE(iter.IsWord()); | |
| 59 EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString()); | |
| 60 EXPECT_FALSE(iter.Advance()); | |
| 61 EXPECT_FALSE(iter.IsWord()); | |
| 62 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 63 EXPECT_FALSE(iter.IsWord()); | |
| 64 } | |
| 65 | |
| 66 TEST(BreakIteratorTest, BreakWide16) { | |
| 67 // Two greek words separated by space. | |
| 68 const string16 str(WideToUTF16( | |
| 69 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" | |
| 70 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); | |
| 71 const string16 word1(str.substr(0, 10)); | |
| 72 const string16 word2(str.substr(11, 5)); | |
| 73 BreakIterator iter(str, BreakIterator::BREAK_WORD); | |
| 74 ASSERT_TRUE(iter.Init()); | |
| 75 EXPECT_TRUE(iter.Advance()); | |
| 76 EXPECT_TRUE(iter.IsWord()); | |
| 77 EXPECT_EQ(word1, iter.GetString()); | |
| 78 EXPECT_TRUE(iter.Advance()); | |
| 79 EXPECT_FALSE(iter.IsWord()); | |
| 80 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); | |
| 81 EXPECT_TRUE(iter.Advance()); | |
| 82 EXPECT_TRUE(iter.IsWord()); | |
| 83 EXPECT_EQ(word2, iter.GetString()); | |
| 84 EXPECT_FALSE(iter.Advance()); | |
| 85 EXPECT_FALSE(iter.IsWord()); | |
| 86 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 87 EXPECT_FALSE(iter.IsWord()); | |
| 88 } | |
| 89 | |
| 90 TEST(BreakIteratorTest, BreakWide32) { | |
| 91 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A | |
| 92 const char very_wide_char[] = "\xF0\x9D\x92\x9C"; | |
| 93 const string16 str( | |
| 94 UTF8ToUTF16(base::StringPrintf("%s a", very_wide_char))); | |
| 95 const string16 very_wide_word(str.substr(0, 2)); | |
| 96 | |
| 97 BreakIterator iter(str, BreakIterator::BREAK_WORD); | |
| 98 ASSERT_TRUE(iter.Init()); | |
| 99 EXPECT_TRUE(iter.Advance()); | |
| 100 EXPECT_TRUE(iter.IsWord()); | |
| 101 EXPECT_EQ(very_wide_word, iter.GetString()); | |
| 102 EXPECT_TRUE(iter.Advance()); | |
| 103 EXPECT_FALSE(iter.IsWord()); | |
| 104 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); | |
| 105 EXPECT_TRUE(iter.Advance()); | |
| 106 EXPECT_TRUE(iter.IsWord()); | |
| 107 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); | |
| 108 EXPECT_FALSE(iter.Advance()); | |
| 109 EXPECT_FALSE(iter.IsWord()); | |
| 110 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 111 EXPECT_FALSE(iter.IsWord()); | |
| 112 } | |
| 113 | |
| 114 TEST(BreakIteratorTest, BreakSpaceEmpty) { | |
| 115 string16 empty; | |
| 116 BreakIterator iter(empty, BreakIterator::BREAK_SPACE); | |
| 117 ASSERT_TRUE(iter.Init()); | |
| 118 EXPECT_FALSE(iter.Advance()); | |
| 119 EXPECT_FALSE(iter.IsWord()); | |
| 120 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 121 EXPECT_FALSE(iter.IsWord()); | |
| 122 } | |
| 123 | |
| 124 TEST(BreakIteratorTest, BreakSpace) { | |
| 125 string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); | |
| 126 BreakIterator iter(str, BreakIterator::BREAK_SPACE); | |
| 127 ASSERT_TRUE(iter.Init()); | |
| 128 EXPECT_TRUE(iter.Advance()); | |
| 129 EXPECT_FALSE(iter.IsWord()); | |
| 130 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); | |
| 131 EXPECT_TRUE(iter.Advance()); | |
| 132 EXPECT_FALSE(iter.IsWord()); | |
| 133 EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString()); | |
| 134 EXPECT_TRUE(iter.Advance()); | |
| 135 EXPECT_FALSE(iter.IsWord()); | |
| 136 EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString()); | |
| 137 EXPECT_TRUE(iter.Advance()); | |
| 138 EXPECT_FALSE(iter.IsWord()); | |
| 139 EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString()); | |
| 140 EXPECT_TRUE(iter.Advance()); | |
| 141 EXPECT_FALSE(iter.IsWord()); | |
| 142 EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString()); | |
| 143 EXPECT_FALSE(iter.Advance()); | |
| 144 EXPECT_FALSE(iter.IsWord()); | |
| 145 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 146 EXPECT_FALSE(iter.IsWord()); | |
| 147 } | |
| 148 | |
| 149 TEST(BreakIteratorTest, BreakSpaceSP) { | |
| 150 string16 str(UTF8ToUTF16(" foo bar! \npouet boom ")); | |
| 151 BreakIterator iter(str, BreakIterator::BREAK_SPACE); | |
| 152 ASSERT_TRUE(iter.Init()); | |
| 153 EXPECT_TRUE(iter.Advance()); | |
| 154 EXPECT_FALSE(iter.IsWord()); | |
| 155 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); | |
| 156 EXPECT_TRUE(iter.Advance()); | |
| 157 EXPECT_FALSE(iter.IsWord()); | |
| 158 EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString()); | |
| 159 EXPECT_TRUE(iter.Advance()); | |
| 160 EXPECT_FALSE(iter.IsWord()); | |
| 161 EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString()); | |
| 162 EXPECT_TRUE(iter.Advance()); | |
| 163 EXPECT_FALSE(iter.IsWord()); | |
| 164 EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString()); | |
| 165 EXPECT_TRUE(iter.Advance()); | |
| 166 EXPECT_FALSE(iter.IsWord()); | |
| 167 EXPECT_EQ(UTF8ToUTF16("boom "), iter.GetString()); | |
| 168 EXPECT_FALSE(iter.Advance()); | |
| 169 EXPECT_FALSE(iter.IsWord()); | |
| 170 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 171 EXPECT_FALSE(iter.IsWord()); | |
| 172 } | |
| 173 | |
| 174 TEST(BreakIteratorTest, BreakSpacekWide16) { | |
| 175 // Two Greek words. | |
| 176 const string16 str(WideToUTF16( | |
| 177 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" | |
| 178 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); | |
| 179 const string16 word1(str.substr(0, 11)); | |
| 180 const string16 word2(str.substr(11, 5)); | |
| 181 BreakIterator iter(str, BreakIterator::BREAK_SPACE); | |
| 182 ASSERT_TRUE(iter.Init()); | |
| 183 EXPECT_TRUE(iter.Advance()); | |
| 184 EXPECT_FALSE(iter.IsWord()); | |
| 185 EXPECT_EQ(word1, iter.GetString()); | |
| 186 EXPECT_TRUE(iter.Advance()); | |
| 187 EXPECT_FALSE(iter.IsWord()); | |
| 188 EXPECT_EQ(word2, iter.GetString()); | |
| 189 EXPECT_FALSE(iter.Advance()); | |
| 190 EXPECT_FALSE(iter.IsWord()); | |
| 191 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 192 EXPECT_FALSE(iter.IsWord()); | |
| 193 } | |
| 194 | |
| 195 TEST(BreakIteratorTest, BreakSpaceWide32) { | |
| 196 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A | |
| 197 const char very_wide_char[] = "\xF0\x9D\x92\x9C"; | |
| 198 const string16 str( | |
| 199 UTF8ToUTF16(base::StringPrintf("%s a", very_wide_char))); | |
| 200 const string16 very_wide_word(str.substr(0, 3)); | |
| 201 | |
| 202 BreakIterator iter(str, BreakIterator::BREAK_SPACE); | |
| 203 ASSERT_TRUE(iter.Init()); | |
| 204 EXPECT_TRUE(iter.Advance()); | |
| 205 EXPECT_FALSE(iter.IsWord()); | |
| 206 EXPECT_EQ(very_wide_word, iter.GetString()); | |
| 207 EXPECT_TRUE(iter.Advance()); | |
| 208 EXPECT_FALSE(iter.IsWord()); | |
| 209 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); | |
| 210 EXPECT_FALSE(iter.Advance()); | |
| 211 EXPECT_FALSE(iter.IsWord()); | |
| 212 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 213 EXPECT_FALSE(iter.IsWord()); | |
| 214 } | |
| 215 | |
| 216 TEST(BreakIteratorTest, BreakLineEmpty) { | |
| 217 string16 empty; | |
| 218 BreakIterator iter(empty, BreakIterator::BREAK_NEWLINE); | |
| 219 ASSERT_TRUE(iter.Init()); | |
| 220 EXPECT_FALSE(iter.Advance()); | |
| 221 EXPECT_FALSE(iter.IsWord()); | |
| 222 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 223 EXPECT_FALSE(iter.IsWord()); | |
| 224 } | |
| 225 | |
| 226 TEST(BreakIteratorTest, BreakLine) { | |
| 227 string16 nl(UTF8ToUTF16("\n")); | |
| 228 string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom")); | |
| 229 BreakIterator iter(str, BreakIterator::BREAK_NEWLINE); | |
| 230 ASSERT_TRUE(iter.Init()); | |
| 231 EXPECT_TRUE(iter.Advance()); | |
| 232 EXPECT_FALSE(iter.IsWord()); | |
| 233 EXPECT_EQ(nl, iter.GetString()); | |
| 234 EXPECT_TRUE(iter.Advance()); | |
| 235 EXPECT_FALSE(iter.IsWord()); | |
| 236 EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString()); | |
| 237 EXPECT_TRUE(iter.Advance()); | |
| 238 EXPECT_FALSE(iter.IsWord()); | |
| 239 EXPECT_EQ(nl, iter.GetString()); | |
| 240 EXPECT_TRUE(iter.Advance()); | |
| 241 EXPECT_FALSE(iter.IsWord()); | |
| 242 EXPECT_EQ(UTF8ToUTF16("pouet boom"), iter.GetString()); | |
| 243 EXPECT_FALSE(iter.Advance()); | |
| 244 EXPECT_FALSE(iter.IsWord()); | |
| 245 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 246 EXPECT_FALSE(iter.IsWord()); | |
| 247 } | |
| 248 | |
| 249 TEST(BreakIteratorTest, BreakLineNL) { | |
| 250 string16 nl(UTF8ToUTF16("\n")); | |
| 251 string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom\n")); | |
| 252 BreakIterator iter(str, BreakIterator::BREAK_NEWLINE); | |
| 253 ASSERT_TRUE(iter.Init()); | |
| 254 EXPECT_TRUE(iter.Advance()); | |
| 255 EXPECT_FALSE(iter.IsWord()); | |
| 256 EXPECT_EQ(nl, iter.GetString()); | |
| 257 EXPECT_TRUE(iter.Advance()); | |
| 258 EXPECT_FALSE(iter.IsWord()); | |
| 259 EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString()); | |
| 260 EXPECT_TRUE(iter.Advance()); | |
| 261 EXPECT_FALSE(iter.IsWord()); | |
| 262 EXPECT_EQ(nl, iter.GetString()); | |
| 263 EXPECT_TRUE(iter.Advance()); | |
| 264 EXPECT_FALSE(iter.IsWord()); | |
| 265 EXPECT_EQ(UTF8ToUTF16("pouet boom\n"), iter.GetString()); | |
| 266 EXPECT_FALSE(iter.Advance()); | |
| 267 EXPECT_FALSE(iter.IsWord()); | |
| 268 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 269 EXPECT_FALSE(iter.IsWord()); | |
| 270 } | |
| 271 | |
| 272 TEST(BreakIteratorTest, BreakLineWide16) { | |
| 273 // Two Greek words separated by newline. | |
| 274 const string16 str(WideToUTF16( | |
| 275 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" | |
| 276 L"\x03bf\x03c2\x000a\x0399\x03c3\x03c4\x03cc\x03c2")); | |
| 277 const string16 line1(str.substr(0, 11)); | |
| 278 const string16 line2(str.substr(11, 5)); | |
| 279 BreakIterator iter(str, BreakIterator::BREAK_NEWLINE); | |
| 280 ASSERT_TRUE(iter.Init()); | |
| 281 EXPECT_TRUE(iter.Advance()); | |
| 282 EXPECT_FALSE(iter.IsWord()); | |
| 283 EXPECT_EQ(line1, iter.GetString()); | |
| 284 EXPECT_TRUE(iter.Advance()); | |
| 285 EXPECT_FALSE(iter.IsWord()); | |
| 286 EXPECT_EQ(line2, iter.GetString()); | |
| 287 EXPECT_FALSE(iter.Advance()); | |
| 288 EXPECT_FALSE(iter.IsWord()); | |
| 289 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 290 EXPECT_FALSE(iter.IsWord()); | |
| 291 } | |
| 292 | |
| 293 TEST(BreakIteratorTest, BreakLineWide32) { | |
| 294 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A | |
| 295 const char very_wide_char[] = "\xF0\x9D\x92\x9C"; | |
| 296 const string16 str( | |
| 297 UTF8ToUTF16(base::StringPrintf("%s\na", very_wide_char))); | |
| 298 const string16 very_wide_line(str.substr(0, 3)); | |
| 299 BreakIterator iter(str, BreakIterator::BREAK_NEWLINE); | |
| 300 ASSERT_TRUE(iter.Init()); | |
| 301 EXPECT_TRUE(iter.Advance()); | |
| 302 EXPECT_FALSE(iter.IsWord()); | |
| 303 EXPECT_EQ(very_wide_line, iter.GetString()); | |
| 304 EXPECT_TRUE(iter.Advance()); | |
| 305 EXPECT_FALSE(iter.IsWord()); | |
| 306 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); | |
| 307 EXPECT_FALSE(iter.Advance()); | |
| 308 EXPECT_FALSE(iter.IsWord()); | |
| 309 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. | |
| 310 EXPECT_FALSE(iter.IsWord()); | |
| 311 } | |
| 312 | |
| 313 TEST(BreakIteratorTest, BreakCharacter) { | |
| 314 static const wchar_t* kCharacters[] = { | |
| 315 // An English word consisting of four ASCII characters. | |
| 316 L"w", L"o", L"r", L"d", L" ", | |
| 317 // A Hindi word (which means "Hindi") consisting of three Devanagari | |
| 318 // characters. | |
| 319 L"\x0939\x093F", L"\x0928\x094D", L"\x0926\x0940", L" ", | |
| 320 // A Thai word (which means "feel") consisting of three Thai characters. | |
| 321 L"\x0E23\x0E39\x0E49", L"\x0E2A\x0E36", L"\x0E01", L" ", | |
| 322 }; | |
| 323 std::vector<string16> characters; | |
| 324 string16 text; | |
| 325 for (size_t i = 0; i < arraysize(kCharacters); ++i) { | |
| 326 characters.push_back(WideToUTF16(kCharacters[i])); | |
| 327 text.append(characters.back()); | |
| 328 } | |
| 329 BreakIterator iter(text, BreakIterator::BREAK_CHARACTER); | |
| 330 ASSERT_TRUE(iter.Init()); | |
| 331 for (size_t i = 0; i < arraysize(kCharacters); ++i) { | |
| 332 EXPECT_TRUE(iter.Advance()); | |
| 333 EXPECT_EQ(characters[i], iter.GetString()); | |
| 334 } | |
| 335 } | |
| 336 | |
| 337 // Test for https://code.google.com/p/chromium/issues/detail?id=411213 | |
| 338 // We should be able to get valid substrings with GetString() function | |
| 339 // after setting new content by calling SetText(). | |
| 340 TEST(BreakIteratorTest, GetStringAfterSetText) { | |
| 341 const string16 initial_string(ASCIIToUTF16("str")); | |
| 342 BreakIterator iter(initial_string, BreakIterator::BREAK_WORD); | |
| 343 ASSERT_TRUE(iter.Init()); | |
| 344 | |
| 345 const string16 long_string(ASCIIToUTF16("another,string")); | |
| 346 EXPECT_TRUE(iter.SetText(long_string.c_str(), long_string.size())); | |
| 347 EXPECT_TRUE(iter.Advance()); | |
| 348 EXPECT_TRUE(iter.Advance()); // Advance to ',' in |long_string| | |
| 349 | |
| 350 // Check that the current position is out of bounds of the |initial_string|. | |
| 351 EXPECT_LT(initial_string.size(), iter.pos()); | |
| 352 | |
| 353 // Check that we can get a valid substring of |long_string|. | |
| 354 EXPECT_EQ(ASCIIToUTF16(","), iter.GetString()); | |
| 355 } | |
| 356 | |
| 357 TEST(BreakIteratorTest, GetStringPiece) { | |
| 358 const string16 initial_string(ASCIIToUTF16("some string")); | |
| 359 BreakIterator iter(initial_string, BreakIterator::BREAK_WORD); | |
| 360 ASSERT_TRUE(iter.Init()); | |
| 361 | |
| 362 EXPECT_TRUE(iter.Advance()); | |
| 363 EXPECT_EQ(iter.GetString(), iter.GetStringPiece().as_string()); | |
| 364 EXPECT_EQ(StringPiece16(ASCIIToUTF16("some")), iter.GetStringPiece()); | |
| 365 | |
| 366 EXPECT_TRUE(iter.Advance()); | |
| 367 EXPECT_TRUE(iter.Advance()); | |
| 368 EXPECT_EQ(iter.GetString(), iter.GetStringPiece().as_string()); | |
| 369 EXPECT_EQ(StringPiece16(ASCIIToUTF16("string")), iter.GetStringPiece()); | |
| 370 } | |
| 371 | |
| 372 } // namespace i18n | |
| 373 } // namespace base | |
| OLD | NEW |