Index: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc |
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc |
index 08809ded8e7905613cfbdafbd361a970d7d5a333..358d6ea225a7ec078641e273ad9d731bb22363bf 100644 |
--- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc |
+++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc |
@@ -6,12 +6,15 @@ |
#include <vector> |
#include "base/format_macros.h" |
+#include "base/i18n/break_iterator.h" |
#include "base/strings/string_split.h" |
#include "base/strings/stringprintf.h" |
#include "base/strings/utf_string_conversions.h" |
#include "chrome/renderer/spellchecker/spellcheck_worditerator.h" |
#include "testing/gtest/include/gtest/gtest.h" |
+using base::i18n::BreakIterator; |
+ |
namespace { |
struct TestCase { |
@@ -20,6 +23,12 @@ struct TestCase { |
const wchar_t* expected_words; |
}; |
+base::string16 GetRulesForLanguage(const std::string& language) { |
+ SpellcheckCharAttribute attribute; |
+ attribute.SetDefaultLanguage(language); |
+ return attribute.GetRuleSet(true); |
+} |
+ |
} // namespace |
// Tests whether or not our SpellcheckWordIterator can extract only words used |
@@ -295,3 +304,142 @@ TEST(SpellcheckWordIteratorTest, Initialization) { |
EXPECT_FALSE(iterator.Initialize(&attributes, true)); |
} |
} |
+ |
+// Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting |
+// IS_NOT_WORD_BREAK. |
+TEST(SpellcheckWordIteratorTest, BreakLine) { |
please use gerrit instead
2015/08/07 20:53:10
This test should be in base/.
Also add a test for
Julius
2015/08/10 16:06:37
Done.
|
+ // The string "foo ទេ Can Ми..." which contains English, Khmer, and Russian |
+ // characters, in that order. |
+ base::string16 text( |
+ base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438...")); |
please use gerrit instead
2015/08/07 20:53:10
Put a newline in there, so that you you get one re
Julius
2015/08/10 16:06:37
Well, it's still going to be IS_NOT_WORD_BREAK if
|
+ BreakIterator iter(text, BreakIterator::BREAK_LINE); |
+ ASSERT_TRUE(iter.Init()); |
+ |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds "foo". |
please use gerrit instead
2015/08/07 20:53:10
Also add this throught:
EXPECT_EQ(base::WideToUTF
Julius
2015/08/10 16:06:37
Done.
|
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK); |
please use gerrit instead
2015/08/07 20:53:10
Can you think of a better name for BreakIterator::
Julius
2015/08/10 16:06:37
IS_LINE_OR_CHAR_BREAK seems good.
|
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the space, the Khmer characters, and the next space. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds "Can". |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the Russian characters and periods. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK); |
+ EXPECT_FALSE(iter.Advance()); |
+} |
+ |
+// This test uses English rules to check that different character set |
+// combinations properly find word breaks and skippable characters. |
+TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) { |
+ // The string "foo ទេ Can Ми..." which contains English, Khmer, and Russian |
+ // characters, in that order. |
+ base::string16 text( |
+ base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438...")); |
+ BreakIterator iter(text, GetRulesForLanguage("en-US")); |
+ ASSERT_TRUE(iter.Init()); |
+ |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds "foo". |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the space and then the Khmer characters. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the next space and "Can". |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the next space and each Russian character. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the periods at the end. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_FALSE(iter.Advance()); |
+} |
+ |
+// This test uses Russian rules to check that different character set |
+// combinations properly find word breaks and skippable characters. |
+TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) { |
+ // The string ".;Ми Can ទេ " which contains Russian, English, and Khmer |
+ // characters, in that order. |
+ base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17c1 ")); |
+ BreakIterator iter(text, GetRulesForLanguage("ru-RU")); |
+ ASSERT_TRUE(iter.Init()); |
+ |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the period and semicolon. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds all the Russian characters. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the space and "Can". |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the next space, the Khmer characters, and the last two spaces. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_FALSE(iter.Advance()); |
+} |
+ |
+// This test uses Khmer rules to check that different character set combinations |
+// properly find word breaks and skippable characters. |
+TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) { |
+ // The string "Ми ទេzoo. ," which contains Russian, Khmer, and English |
+ // characters, in that order. |
+ base::string16 text(base::WideToUTF16(L"\x041C\x0438 \x1791\x17c1zoo. ,")); |
+ BreakIterator iter(text, GetRulesForLanguage("km")); |
+ ASSERT_TRUE(iter.Init()); |
+ |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds each Russian character and the space. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds all the Khmer characters. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds each character in "zoo". |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ // Finds the period, space, and comma. |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_TRUE(iter.Advance()); |
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); |
+ EXPECT_FALSE(iter.Advance()); |
+} |