Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(97)

Unified Diff: base/i18n/break_iterator_unittest.cc

Issue 1272683002: Creates BreakIterator::GetWordBreakStatus. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase and address comments. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/i18n/break_iterator_unittest.cc
diff --git a/base/i18n/break_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc
index 220a996b961c70e8a164f6fe6cc0f6c7fd7185a3..4cfc66efab746b23ac0eb82482fe70c8543b95cb 100644
--- a/base/i18n/break_iterator_unittest.cc
+++ b/base/i18n/break_iterator_unittest.cc
@@ -369,5 +369,88 @@ TEST(BreakIteratorTest, GetStringPiece) {
EXPECT_EQ(StringPiece16(ASCIIToUTF16("string")), iter.GetStringPiece());
}
+// Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting
+// IS_LINE_OR_CHAR_BREAK.
+TEST(BreakIteratorTest, IsWordBreakBreakLine) {
+ // The string "foo ទេ \nCan Ми..." which contains English, Khmer, and Russian
+ // characters, in that order.
+ base::string16 text(
+ base::WideToUTF16(L"foo \x1791\x17c1 \nCan \x041C\x0438..."));
+ BreakIterator iter(text, BreakIterator::BREAK_LINE);
+ ASSERT_TRUE(iter.Init());
+
+ EXPECT_TRUE(iter.Advance());
+ // Finds "foo" and the space.
+ EXPECT_EQ(base::UTF8ToUTF16("foo "), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_LINE_OR_CHAR_BREAK);
please use gerrit instead 2015/08/10 17:24:42 Here and below, use EXPECT_EQ(item1, item2) instea
Julius 2015/08/10 18:56:19 Done.
+ EXPECT_TRUE(iter.Advance());
+ // Finds the Khmer characters, the next space, and the newline.
+ EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1 \n"), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_LINE_OR_CHAR_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds "Can" and the sapce.
+ EXPECT_EQ(base::UTF8ToUTF16("Can "), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_LINE_OR_CHAR_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the Russian characters and periods.
+ EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438..."), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_LINE_OR_CHAR_BREAK);
+ EXPECT_FALSE(iter.Advance());
+}
+
+// Make sure that in BREAK_WORD mode we're getting IS_WORD_BREAK and
+// IS_SKIPPABLE_WORD when we should be. IS_WORD_BREAK should be returned when we
+// finish going over non-punctuation characters while IS_SKIPPABLE_WORD should
+// be returned on punctuation and spaces.
+TEST(BreakIteratorTest, IsWordBreakBreakWord) {
+ // The string "foo ទេ \nCan Ми..." which contains English, Khmer, and Russian
+ // characters, in that order.
+ base::string16 text(
+ base::WideToUTF16(L"foo \x1791\x17c1 \nCan \x041C\x0438..."));
+ BreakIterator iter(text, BreakIterator::BREAK_WORD);
+ ASSERT_TRUE(iter.Init());
+
+ EXPECT_TRUE(iter.Advance());
+ // Finds "foo".
+ EXPECT_EQ(base::UTF8ToUTF16("foo"), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space, and the Khmer characters.
+ EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1"), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space and the newline.
+ EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::UTF8ToUTF16("\n"), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ // Finds "Can".
+ EXPECT_EQ(base::UTF8ToUTF16("Can"), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space and the Russian characters.
+ EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438"), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the trailing periods.
+ EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_FALSE(iter.Advance());
+}
+
} // namespace i18n
} // namespace base

Powered by Google App Engine
This is Rietveld 408576698