Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(110)

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc

Issue 1272683002: Creates BreakIterator::GetWordBreakStatus. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Added comments and such. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« base/i18n/break_iterator.cc ('K') | « base/i18n/break_iterator.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <string> 5 #include <string>
6 #include <vector> 6 #include <vector>
7 7
8 #include "base/format_macros.h" 8 #include "base/format_macros.h"
9 #include "base/i18n/break_iterator.h"
9 #include "base/strings/string_split.h" 10 #include "base/strings/string_split.h"
10 #include "base/strings/stringprintf.h" 11 #include "base/strings/stringprintf.h"
11 #include "base/strings/utf_string_conversions.h" 12 #include "base/strings/utf_string_conversions.h"
12 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" 13 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
13 #include "testing/gtest/include/gtest/gtest.h" 14 #include "testing/gtest/include/gtest/gtest.h"
14 15
16 using base::i18n::BreakIterator;
17
15 namespace { 18 namespace {
16 19
17 struct TestCase { 20 struct TestCase {
18 const char* language; 21 const char* language;
19 bool allow_contraction; 22 bool allow_contraction;
20 const wchar_t* expected_words; 23 const wchar_t* expected_words;
21 }; 24 };
22 25
26 base::string16 GetRulesForLanguage(const std::string& language) {
27 SpellcheckCharAttribute attribute;
28 attribute.SetDefaultLanguage(language);
29 return attribute.GetRuleSet(true);
30 }
31
23 } // namespace 32 } // namespace
24 33
25 // Tests whether or not our SpellcheckWordIterator can extract only words used 34 // Tests whether or not our SpellcheckWordIterator can extract only words used
26 // by the specified language from a multi-language text. 35 // by the specified language from a multi-language text.
27 TEST(SpellcheckWordIteratorTest, SplitWord) { 36 TEST(SpellcheckWordIteratorTest, SplitWord) {
28 // An input text. This text includes words of several languages. (Some words 37 // An input text. This text includes words of several languages. (Some words
29 // are not separated with whitespace characters.) Our SpellcheckWordIterator 38 // are not separated with whitespace characters.) Our SpellcheckWordIterator
30 // should extract only the words used by the specified language from this text 39 // should extract only the words used by the specified language from this text
31 // and normalize them so our spell-checker can check their spellings. 40 // and normalize them so our spell-checker can check their spellings.
32 const wchar_t kTestText[] = 41 const wchar_t kTestText[] =
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 } 297 }
289 298
290 // Test initialization fails when no default language is set. 299 // Test initialization fails when no default language is set.
291 { 300 {
292 SpellcheckCharAttribute attributes; 301 SpellcheckCharAttribute attributes;
293 302
294 SpellcheckWordIterator iterator; 303 SpellcheckWordIterator iterator;
295 EXPECT_FALSE(iterator.Initialize(&attributes, true)); 304 EXPECT_FALSE(iterator.Initialize(&attributes, true));
296 } 305 }
297 } 306 }
307
308 // Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting
309 // IS_NOT_WORD_BREAK.
310 TEST(SpellcheckWordIteratorTest, BreakLine) {
please use gerrit instead 2015/08/07 20:53:10 This test should be in base/. Also add a test for
Julius 2015/08/10 16:06:37 Done.
311 // The string "foo ទេ Can Ми..." which contains English, Khmer, and Russian
312 // characters, in that order.
313 base::string16 text(
314 base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438..."));
please use gerrit instead 2015/08/07 20:53:10 Put a newline in there, so that you you get one re
Julius 2015/08/10 16:06:37 Well, it's still going to be IS_NOT_WORD_BREAK if
315 BreakIterator iter(text, BreakIterator::BREAK_LINE);
316 ASSERT_TRUE(iter.Init());
317
318 EXPECT_TRUE(iter.Advance());
319 // Finds "foo".
please use gerrit instead 2015/08/07 20:53:10 Also add this throught: EXPECT_EQ(base::WideToUTF
Julius 2015/08/10 16:06:37 Done.
320 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
please use gerrit instead 2015/08/07 20:53:10 Can you think of a better name for BreakIterator::
Julius 2015/08/10 16:06:37 IS_LINE_OR_CHAR_BREAK seems good.
321 EXPECT_TRUE(iter.Advance());
322 // Finds the space, the Khmer characters, and the next space.
323 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
324 EXPECT_TRUE(iter.Advance());
325 // Finds "Can".
326 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
327 EXPECT_TRUE(iter.Advance());
328 // Finds the Russian characters and periods.
329 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
330 EXPECT_FALSE(iter.Advance());
331 }
332
333 // This test uses English rules to check that different character set
334 // combinations properly find word breaks and skippable characters.
335 TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) {
336 // The string "foo ទេ Can Ми..." which contains English, Khmer, and Russian
337 // characters, in that order.
338 base::string16 text(
339 base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438..."));
340 BreakIterator iter(text, GetRulesForLanguage("en-US"));
341 ASSERT_TRUE(iter.Init());
342
343 EXPECT_TRUE(iter.Advance());
344 // Finds "foo".
345 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
346 EXPECT_TRUE(iter.Advance());
347 // Finds the space and then the Khmer characters.
348 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
349 EXPECT_TRUE(iter.Advance());
350 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
351 EXPECT_TRUE(iter.Advance());
352 // Finds the next space and "Can".
353 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
354 EXPECT_TRUE(iter.Advance());
355 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
356 EXPECT_TRUE(iter.Advance());
357 // Finds the next space and each Russian character.
358 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
359 EXPECT_TRUE(iter.Advance());
360 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
361 EXPECT_TRUE(iter.Advance());
362 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
363 EXPECT_TRUE(iter.Advance());
364 // Finds the periods at the end.
365 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
366 EXPECT_TRUE(iter.Advance());
367 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
368 EXPECT_TRUE(iter.Advance());
369 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
370 EXPECT_FALSE(iter.Advance());
371 }
372
373 // This test uses Russian rules to check that different character set
374 // combinations properly find word breaks and skippable characters.
375 TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) {
376 // The string ".;Ми Can ទេ " which contains Russian, English, and Khmer
377 // characters, in that order.
378 base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17c1 "));
379 BreakIterator iter(text, GetRulesForLanguage("ru-RU"));
380 ASSERT_TRUE(iter.Init());
381
382 EXPECT_TRUE(iter.Advance());
383 // Finds the period and semicolon.
384 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
385 EXPECT_TRUE(iter.Advance());
386 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
387 EXPECT_TRUE(iter.Advance());
388 // Finds all the Russian characters.
389 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
390 EXPECT_TRUE(iter.Advance());
391 // Finds the space and "Can".
392 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
393 EXPECT_TRUE(iter.Advance());
394 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
395 EXPECT_TRUE(iter.Advance());
396 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
397 EXPECT_TRUE(iter.Advance());
398 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
399 EXPECT_TRUE(iter.Advance());
400 // Finds the next space, the Khmer characters, and the last two spaces.
401 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
402 EXPECT_TRUE(iter.Advance());
403 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
404 EXPECT_TRUE(iter.Advance());
405 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
406 EXPECT_TRUE(iter.Advance());
407 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
408 EXPECT_FALSE(iter.Advance());
409 }
410
411 // This test uses Khmer rules to check that different character set combinations
412 // properly find word breaks and skippable characters.
413 TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) {
414 // The string "Ми ទេzoo. ," which contains Russian, Khmer, and English
415 // characters, in that order.
416 base::string16 text(base::WideToUTF16(L"\x041C\x0438 \x1791\x17c1zoo. ,"));
417 BreakIterator iter(text, GetRulesForLanguage("km"));
418 ASSERT_TRUE(iter.Init());
419
420 EXPECT_TRUE(iter.Advance());
421 // Finds each Russian character and the space.
422 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
423 EXPECT_TRUE(iter.Advance());
424 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
425 EXPECT_TRUE(iter.Advance());
426 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
427 EXPECT_TRUE(iter.Advance());
428 // Finds all the Khmer characters.
429 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
430 EXPECT_TRUE(iter.Advance());
431 // Finds each character in "zoo".
432 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
433 EXPECT_TRUE(iter.Advance());
434 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
435 EXPECT_TRUE(iter.Advance());
436 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
437 EXPECT_TRUE(iter.Advance());
438 // Finds the period, space, and comma.
439 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
440 EXPECT_TRUE(iter.Advance());
441 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
442 EXPECT_TRUE(iter.Advance());
443 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
444 EXPECT_FALSE(iter.Advance());
445 }
OLDNEW
« base/i18n/break_iterator.cc ('K') | « base/i18n/break_iterator.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698