| Index: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
|
| diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
|
| index 08809ded8e7905613cfbdafbd361a970d7d5a333..a9812a9d95efde60f2f4e677573290b93e26b434 100644
|
| --- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
|
| +++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
|
| @@ -6,12 +6,15 @@
|
| #include <vector>
|
|
|
| #include "base/format_macros.h"
|
| +#include "base/i18n/break_iterator.h"
|
| #include "base/strings/string_split.h"
|
| #include "base/strings/stringprintf.h"
|
| #include "base/strings/utf_string_conversions.h"
|
| #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
|
| #include "testing/gtest/include/gtest/gtest.h"
|
|
|
| +using base::i18n::BreakIterator;
|
| +
|
| namespace {
|
|
|
| struct TestCase {
|
| @@ -20,6 +23,12 @@ struct TestCase {
|
| const wchar_t* expected_words;
|
| };
|
|
|
| +base::string16 GetRulesForLanguage(const std::string& language) {
|
| + SpellcheckCharAttribute attribute;
|
| + attribute.SetDefaultLanguage(language);
|
| + return attribute.GetRuleSet(true);
|
| +}
|
| +
|
| } // namespace
|
|
|
| // Tests whether or not our SpellcheckWordIterator can extract only words used
|
| @@ -295,3 +304,149 @@ TEST(SpellcheckWordIteratorTest, Initialization) {
|
| EXPECT_FALSE(iterator.Initialize(&attributes, true));
|
| }
|
| }
|
| +
|
| +// This test uses English rules to check that different character set
|
| +// combinations properly find word breaks and skippable characters.
|
| +TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) {
|
| + // The string "foo ទេ Can Ми..." which contains English, Khmer, and Russian
|
| + // characters, in that order.
|
| + base::string16 text(
|
| + base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438..."));
|
| + BreakIterator iter(text, GetRulesForLanguage("en-US"));
|
| + ASSERT_TRUE(iter.Init());
|
| +
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds "foo".
|
| + EXPECT_EQ(base::UTF8ToUTF16("foo"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the space and then the Khmer characters.
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the next space and "Can".
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("Can"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the next space and each Russian character.
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::WideToUTF16(L"\x041C"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::WideToUTF16(L"\x0438"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the periods at the end.
|
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_FALSE(iter.Advance());
|
| +}
|
| +
|
| +// This test uses Russian rules to check that different character set
|
| +// combinations properly find word breaks and skippable characters.
|
| +TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) {
|
| + // The string ".;Ми Can ទេ " which contains Russian, English, and Khmer
|
| + // characters, in that order.
|
| + base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17c1 "));
|
| + BreakIterator iter(text, GetRulesForLanguage("ru-RU"));
|
| + ASSERT_TRUE(iter.Init());
|
| +
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the period and semicolon.
|
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16(";"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds all the Russian characters.
|
| + EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the space and each character in "Can".
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("C"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("a"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("n"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the next space, the Khmer characters, and the last two spaces.
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_FALSE(iter.Advance());
|
| +}
|
| +
|
| +// This test uses Khmer rules to check that different character set combinations
|
| +// properly find word breaks and skippable characters.
|
| +TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) {
|
| + // The string "Ми ទេzoo. ," which contains Russian, Khmer, and English
|
| + // characters, in that order.
|
| + base::string16 text(base::WideToUTF16(L"\x041C\x0438 \x1791\x17c1zoo. ,"));
|
| + BreakIterator iter(text, GetRulesForLanguage("km"));
|
| + ASSERT_TRUE(iter.Init());
|
| +
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds each Russian character and the space.
|
| + EXPECT_EQ(base::WideToUTF16(L"\x041C"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::WideToUTF16(L"\x0438"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds all the Khmer characters.
|
| + EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds each character in "zoo".
|
| + EXPECT_EQ(base::UTF8ToUTF16("z"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("o"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16("o"), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + // Finds the period, space, and comma.
|
| + EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_TRUE(iter.Advance());
|
| + EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString());
|
| + EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
|
| + EXPECT_FALSE(iter.Advance());
|
| +}
|
|
|