| Index: base/i18n/break_iterator_unittest.cc
|
| diff --git a/base/i18n/break_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc
|
| index 220a996b961c70e8a164f6fe6cc0f6c7fd7185a3..c53509148d9ccfbd7bd2fc8c66715f31e0e8409c 100644
|
| --- a/base/i18n/break_iterator_unittest.cc
|
| +++ b/base/i18n/break_iterator_unittest.cc
|
| @@ -369,5 +369,90 @@ TEST(BreakIteratorTest, GetStringPiece) {
|
| EXPECT_EQ(StringPiece16(ASCIIToUTF16("string")), iter.GetStringPiece());
|
| }
|
|
|
| +// Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting
|
| +// IS_LINE_OR_CHAR_BREAK.
|
| +TEST(BreakIteratorTest, GetWordBreakStatusBreakLine) {
|
| + // A string containing the English word "foo", followed by two Khmer
|
| + // characters, the English word "Can", and then two Russian characters and
|
| + // punctuation.
|
| + base::string16 text(
|
| + base::WideToUTF16(L"foo \x1791\x17C1 \nCan \x041C\x0438..."));
|
| + BreakIterator iter(text, BreakIterator::BREAK_LINE);
|
| + ASSERT_TRUE(iter.Init());
|
| +
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds "foo" and the space.
|
| + EXPECT_EQ(base::UTF8ToUTF16("foo "), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the Khmer characters, the next space, and the newline.
|
| + EXPECT_EQ(base::WideToUTF16(L"\x1791\x17C1 \n"), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds "Can" and the space.
|
| + EXPECT_EQ(base::UTF8ToUTF16("Can "), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the Russian characters and periods.
|
| + EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438..."), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK);
|
| + EXPECT_FALSE(iter.Advance());
|
| +}
|
| +
|
| +// Make sure that in BREAK_WORD mode we're getting IS_WORD_BREAK and
|
| +// IS_SKIPPABLE_WORD when we should be. IS_WORD_BREAK should be returned when we
|
| +// finish going over non-punctuation characters while IS_SKIPPABLE_WORD should
|
| +// be returned on punctuation and spaces.
|
| +TEST(BreakIteratorTest, GetWordBreakStatusBreakWord) {
|
| + // A string containing the English word "foo", followed by two Khmer
|
| + // characters, the English word "Can", and then two Russian characters and
|
| + // punctuation.
|
| + base::string16 text(
|
| + base::WideToUTF16(L"foo \x1791\x17C1 \nCan \x041C\x0438..."));
|
| + BreakIterator iter(text, BreakIterator::BREAK_WORD);
|
| + ASSERT_TRUE(iter.Init());
|
| +
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds "foo".
|
| + EXPECT_EQ(base::UTF8ToUTF16("foo"), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the space, and the Khmer characters.
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::WideToUTF16(L"\x1791\x17C1"), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the space and the newline.
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("\n"), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds "Can".
|
| + EXPECT_EQ(base::UTF8ToUTF16("Can"), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the space and the Russian characters.
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438"), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the trailing periods.
|
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
|
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_FALSE(iter.Advance());
|
| +}
|
| +
|
| } // namespace i18n
|
| } // namespace base
|
|
|