Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6649)

Unified Diff: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc

Issue 1272683002: Creates BreakIterator::GetWordBreakStatus. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Made new function, added tests. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« base/i18n/break_iterator.h ('K') | « base/i18n/break_iterator.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
index 08809ded8e7905613cfbdafbd361a970d7d5a333..bd9bd6e84254c6ccc25efa9ca72a5503f07be737 100644
--- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
+++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
@@ -6,12 +6,15 @@
#include <vector>
#include "base/format_macros.h"
+#include "base/i18n/break_iterator.h"
#include "base/strings/string_split.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
#include "testing/gtest/include/gtest/gtest.h"
+using base::i18n::BreakIterator;
+
namespace {
struct TestCase {
@@ -22,6 +25,12 @@ struct TestCase {
} // namespace
+base::string16 GetRulesForLanguage(const std::string& language) {
please use gerrit instead 2015/08/07 17:16:59 Put this in anonymous namespace.
Julius 2015/08/07 20:30:04 Done.
+ SpellcheckCharAttribute attribute;
+ attribute.SetDefaultLanguage(language);
+ return attribute.GetRuleSet(true);
+}
+
// Tests whether or not our SpellcheckWordIterator can extract only words used
// by the specified language from a multi-language text.
TEST(SpellcheckWordIteratorTest, SplitWord) {
@@ -295,3 +304,134 @@ TEST(SpellcheckWordIteratorTest, Initialization) {
EXPECT_FALSE(iterator.Initialize(&attributes, true));
}
}
+
+// Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting
+// IS_NOT_WORD_BREAK.
+TEST(SpellcheckWordIteratorTest, BreakLine) {
+ // A string containing English, Khmer, and Russian characters.
please use gerrit instead 2015/08/07 17:16:59 Put actual text into the comment.
Julius 2015/08/07 20:30:04 Done.
+ base::string16 text(
+ base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438..."));
+ BreakIterator iter(text, BreakIterator::BREAK_LINE);
+ ASSERT_TRUE(iter.Init());
+
+ EXPECT_TRUE(iter.Advance());
+ // Finds "foo".
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space, the Khmer characters, and the next space.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds "Can".
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the Russian characters and periods.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
+ EXPECT_FALSE(iter.Advance());
+}
+
+// Check that different character set combinations properly find word breaks and
+// skippable characters.
+TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) {
+ // A string containing English, Khmer, and Russian characters.
+ base::string16 text(
+ base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438..."));
+ BreakIterator iter(text, GetRulesForLanguage("en-US"));
+ ASSERT_TRUE(iter.Init());
+
+ EXPECT_TRUE(iter.Advance());
+ // Finds "foo".
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space and then the Khmer characters.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the next space and "Can".
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the next space and each Russian character.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the periods at the end.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_FALSE(iter.Advance());
+}
+
+TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) {
+ // Text containing Russian, English, and Khmer chracters.
+ base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17c1 "));
+ BreakIterator iter(text, GetRulesForLanguage("ru-RU"));
+ ASSERT_TRUE(iter.Init());
+
+ EXPECT_TRUE(iter.Advance());
+ // Finds the period and semicolon.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ // Finds all the Russian characters.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the space and "Can".
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the next space, the Khmer characters, and the last two spaces.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_FALSE(iter.Advance());
+}
+
+TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) {
+ // Text containing Russian, Khmer, and English characters.
+ base::string16 text(base::WideToUTF16(L"\x041C\x0438 \x1791\x17c1zoo. ,"));
+ BreakIterator iter(text, GetRulesForLanguage("km"));
+ ASSERT_TRUE(iter.Init());
+
+ EXPECT_TRUE(iter.Advance());
+ // Finds each Russian character and the space.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ // Finds all the Khmer characters.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
+ EXPECT_TRUE(iter.Advance());
+ // Finds each character in "zoo".
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ // Finds the period, space, and comma.
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
+ EXPECT_FALSE(iter.Advance());
+}
« base/i18n/break_iterator.h ('K') | « base/i18n/break_iterator.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698