Chromium Code Reviews| Index: base/i18n/break_iterator_unittest.cc |
| diff --git a/base/i18n/break_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc |
| index 220a996b961c70e8a164f6fe6cc0f6c7fd7185a3..4e65bb901bb7bf77a99fcf627d985b12d43309b3 100644 |
| --- a/base/i18n/break_iterator_unittest.cc |
| +++ b/base/i18n/break_iterator_unittest.cc |
| @@ -369,5 +369,88 @@ TEST(BreakIteratorTest, GetStringPiece) { |
| EXPECT_EQ(StringPiece16(ASCIIToUTF16("string")), iter.GetStringPiece()); |
| } |
| +// Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting |
| +// IS_LINE_OR_CHAR_BREAK. |
| +TEST(BreakIteratorTest, GetWordBreakStatusBreakLine) { |
| + // The string "foo ទេ \nCan Ми..." which contains English, Khmer, and Russian |
|
jungshik at Google
2015/08/11 21:43:50
Due to an issue with Visual Studio, we cannot use
Julius
2015/08/12 01:22:21
I got rid of the non-ASCII characters in the comme
|
| + // characters, in that order. |
| + base::string16 text( |
| + base::WideToUTF16(L"foo \x1791\x17c1 \nCan \x041C\x0438...")); |
| + BreakIterator iter(text, BreakIterator::BREAK_LINE); |
| + ASSERT_TRUE(iter.Init()); |
| + |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds "foo" and the space. |
| + EXPECT_EQ(base::UTF8ToUTF16("foo "), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK); |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds the Khmer characters, the next space, and the newline. |
| + EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1 \n"), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK); |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds "Can" and the space. |
| + EXPECT_EQ(base::UTF8ToUTF16("Can "), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK); |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds the Russian characters and periods. |
| + EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438..."), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK); |
| + EXPECT_FALSE(iter.Advance()); |
| +} |
| + |
| +// Make sure that in BREAK_WORD mode we're getting IS_WORD_BREAK and |
| +// IS_SKIPPABLE_WORD when we should be. IS_WORD_BREAK should be returned when we |
| +// finish going over non-punctuation characters while IS_SKIPPABLE_WORD should |
| +// be returned on punctuation and spaces. |
| +TEST(BreakIteratorTest, GetWordBreakStatusBreakWord) { |
| + // The string "foo ទេ \nCan Ми..." which contains English, Khmer, and Russian |
| + // characters, in that order. |
| + base::string16 text( |
| + base::WideToUTF16(L"foo \x1791\x17c1 \nCan \x041C\x0438...")); |
| + BreakIterator iter(text, BreakIterator::BREAK_WORD); |
| + ASSERT_TRUE(iter.Init()); |
| + |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds "foo". |
| + EXPECT_EQ(base::UTF8ToUTF16("foo"), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK); |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds the space, and the Khmer characters. |
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| + EXPECT_TRUE(iter.Advance()); |
| + EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1"), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK); |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds the space and the newline. |
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| + EXPECT_TRUE(iter.Advance()); |
| + EXPECT_EQ(base::UTF8ToUTF16("\n"), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds "Can". |
| + EXPECT_EQ(base::UTF8ToUTF16("Can"), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK); |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds the space and the Russian characters. |
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| + EXPECT_TRUE(iter.Advance()); |
| + EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438"), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK); |
| + EXPECT_TRUE(iter.Advance()); |
| + // Finds the trailing periods. |
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| + EXPECT_TRUE(iter.Advance()); |
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| + EXPECT_TRUE(iter.Advance()); |
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString()); |
| + EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| + EXPECT_FALSE(iter.Advance()); |
| +} |
| + |
| } // namespace i18n |
| } // namespace base |