Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3071)

Unified Diff: base/i18n/break_iterator_unittest.cc

Issue 1272683002: Creates BreakIterator::GetWordBreakStatus. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Updated Khmer tests and ASCII-fied comments. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/i18n/break_iterator_unittest.cc
diff --git a/base/i18n/break_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc
index 220a996b961c70e8a164f6fe6cc0f6c7fd7185a3..c53509148d9ccfbd7bd2fc8c66715f31e0e8409c 100644
--- a/base/i18n/break_iterator_unittest.cc
+++ b/base/i18n/break_iterator_unittest.cc
@@ -369,5 +369,90 @@ TEST(BreakIteratorTest, GetStringPiece) {
EXPECT_EQ(StringPiece16(ASCIIToUTF16("string")), iter.GetStringPiece());
}
+// Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting
+// IS_LINE_OR_CHAR_BREAK.
+TEST(BreakIteratorTest, GetWordBreakStatusBreakLine) {
+ // A string containing the English word "foo", followed by two Khmer
+ // characters, the English word "Can", and then two Russian characters and
+ // punctuation.
+ base::string16 text(
+ base::WideToUTF16(L"foo \x1791\x17C1 \nCan \x041C\x0438..."));
+ BreakIterator iter(text, BreakIterator::BREAK_LINE);
+ ASSERT_TRUE(iter.Init());
+
+ EXPECT_TRUE(iter.Advance());
+ // Finds "foo" and the space.
+ EXPECT_EQ(base::UTF8ToUTF16("foo "), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the Khmer characters, the next space, and the newline.
+ EXPECT_EQ(base::WideToUTF16(L"\x1791\x17C1 \n"), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds "Can" and the space.
+ EXPECT_EQ(base::UTF8ToUTF16("Can "), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the Russian characters and periods.
+ EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438..."), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_LINE_OR_CHAR_BREAK);
+ EXPECT_FALSE(iter.Advance());
+}
+
+// Make sure that in BREAK_WORD mode we're getting IS_WORD_BREAK and
+// IS_SKIPPABLE_WORD when we should be. IS_WORD_BREAK should be returned when we
+// finish going over non-punctuation characters while IS_SKIPPABLE_WORD should
+// be returned on punctuation and spaces.
+TEST(BreakIteratorTest, GetWordBreakStatusBreakWord) {
+ // A string containing the English word "foo", followed by two Khmer
+ // characters, the English word "Can", and then two Russian characters and
+ // punctuation.
+ base::string16 text(
+ base::WideToUTF16(L"foo \x1791\x17C1 \nCan \x041C\x0438..."));
+ BreakIterator iter(text, BreakIterator::BREAK_WORD);
+ ASSERT_TRUE(iter.Init());
+
+ EXPECT_TRUE(iter.Advance());
+ // Finds "foo".
+ EXPECT_EQ(base::UTF8ToUTF16("foo"), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space, and the Khmer characters.
+ EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::WideToUTF16(L"\x1791\x17C1"), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space and the newline.
+ EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::UTF8ToUTF16("\n"), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ // Finds "Can".
+ EXPECT_EQ(base::UTF8ToUTF16("Can"), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space and the Russian characters.
+ EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438"), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the trailing periods.
+ EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
+ EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_FALSE(iter.Advance());
+}
+
} // namespace i18n
} // namespace base

Powered by Google App Engine
This is Rietveld 408576698