Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(262)

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc

Issue 1272683002: Creates BreakIterator::GetWordBreakStatus. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase and address comments. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <string> 5 #include <string>
6 #include <vector> 6 #include <vector>
7 7
8 #include "base/format_macros.h" 8 #include "base/format_macros.h"
9 #include "base/i18n/break_iterator.h"
9 #include "base/strings/string_split.h" 10 #include "base/strings/string_split.h"
10 #include "base/strings/stringprintf.h" 11 #include "base/strings/stringprintf.h"
11 #include "base/strings/utf_string_conversions.h" 12 #include "base/strings/utf_string_conversions.h"
12 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" 13 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
13 #include "testing/gtest/include/gtest/gtest.h" 14 #include "testing/gtest/include/gtest/gtest.h"
14 15
16 using base::i18n::BreakIterator;
17
15 namespace { 18 namespace {
16 19
17 struct TestCase { 20 struct TestCase {
18 const char* language; 21 const char* language;
19 bool allow_contraction; 22 bool allow_contraction;
20 const wchar_t* expected_words; 23 const wchar_t* expected_words;
21 }; 24 };
22 25
26 base::string16 GetRulesForLanguage(const std::string& language) {
27 SpellcheckCharAttribute attribute;
28 attribute.SetDefaultLanguage(language);
29 return attribute.GetRuleSet(true);
30 }
31
23 } // namespace 32 } // namespace
24 33
25 // Tests whether or not our SpellcheckWordIterator can extract only words used 34 // Tests whether or not our SpellcheckWordIterator can extract only words used
26 // by the specified language from a multi-language text. 35 // by the specified language from a multi-language text.
27 TEST(SpellcheckWordIteratorTest, SplitWord) { 36 TEST(SpellcheckWordIteratorTest, SplitWord) {
28 // An input text. This text includes words of several languages. (Some words 37 // An input text. This text includes words of several languages. (Some words
29 // are not separated with whitespace characters.) Our SpellcheckWordIterator 38 // are not separated with whitespace characters.) Our SpellcheckWordIterator
30 // should extract only the words used by the specified language from this text 39 // should extract only the words used by the specified language from this text
31 // and normalize them so our spell-checker can check their spellings. 40 // and normalize them so our spell-checker can check their spellings.
32 const wchar_t kTestText[] = 41 const wchar_t kTestText[] =
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 } 297 }
289 298
290 // Test initialization fails when no default language is set. 299 // Test initialization fails when no default language is set.
291 { 300 {
292 SpellcheckCharAttribute attributes; 301 SpellcheckCharAttribute attributes;
293 302
294 SpellcheckWordIterator iterator; 303 SpellcheckWordIterator iterator;
295 EXPECT_FALSE(iterator.Initialize(&attributes, true)); 304 EXPECT_FALSE(iterator.Initialize(&attributes, true));
296 } 305 }
297 } 306 }
307
308 // This test uses English rules to check that different character set
309 // combinations properly find word breaks and skippable characters.
310 TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) {
311 // The string "foo ទេ Can Ми..." which contains English, Khmer, and Russian
312 // characters, in that order.
313 base::string16 text(
314 base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438..."));
315 BreakIterator iter(text, GetRulesForLanguage("en-US"));
316 ASSERT_TRUE(iter.Init());
317
318 EXPECT_TRUE(iter.Advance());
319 // Finds "foo".
320 EXPECT_EQ(base::UTF8ToUTF16("foo"), iter.GetString());
321 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
322 EXPECT_TRUE(iter.Advance());
323 // Finds the space and then the Khmer characters.
324 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
325 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
326 EXPECT_TRUE(iter.Advance());
327 EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1"), iter.GetString());
328 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
329 EXPECT_TRUE(iter.Advance());
330 // Finds the next space and "Can".
331 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
332 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
333 EXPECT_TRUE(iter.Advance());
334 EXPECT_EQ(base::UTF8ToUTF16("Can"), iter.GetString());
335 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
336 EXPECT_TRUE(iter.Advance());
337 // Finds the next space and each Russian character.
338 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
339 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
340 EXPECT_TRUE(iter.Advance());
341 EXPECT_EQ(base::WideToUTF16(L"\x041C"), iter.GetString());
342 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
343 EXPECT_TRUE(iter.Advance());
344 EXPECT_EQ(base::WideToUTF16(L"\x0438"), iter.GetString());
345 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
346 EXPECT_TRUE(iter.Advance());
347 // Finds the periods at the end.
348 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
349 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
350 EXPECT_TRUE(iter.Advance());
351 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
352 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
353 EXPECT_TRUE(iter.Advance());
354 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
355 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
356 EXPECT_FALSE(iter.Advance());
357 }
358
359 // This test uses Russian rules to check that different character set
360 // combinations properly find word breaks and skippable characters.
361 TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) {
362 // The string ".;Ми Can ទេ " which contains Russian, English, and Khmer
363 // characters, in that order.
364 base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17c1 "));
365 BreakIterator iter(text, GetRulesForLanguage("ru-RU"));
366 ASSERT_TRUE(iter.Init());
367
368 EXPECT_TRUE(iter.Advance());
369 // Finds the period and semicolon.
370 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
371 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
372 EXPECT_TRUE(iter.Advance());
373 EXPECT_EQ(base::UTF8ToUTF16(";"), iter.GetString());
374 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
375 EXPECT_TRUE(iter.Advance());
376 // Finds all the Russian characters.
377 EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438"), iter.GetString());
378 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
379 EXPECT_TRUE(iter.Advance());
380 // Finds the space and each character in "Can".
381 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
382 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
383 EXPECT_TRUE(iter.Advance());
384 EXPECT_EQ(base::UTF8ToUTF16("C"), iter.GetString());
385 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
386 EXPECT_TRUE(iter.Advance());
387 EXPECT_EQ(base::UTF8ToUTF16("a"), iter.GetString());
388 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
389 EXPECT_TRUE(iter.Advance());
390 EXPECT_EQ(base::UTF8ToUTF16("n"), iter.GetString());
391 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
392 EXPECT_TRUE(iter.Advance());
393 // Finds the next space, the Khmer characters, and the last two spaces.
394 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
395 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
396 EXPECT_TRUE(iter.Advance());
397 EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1"), iter.GetString());
398 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
399 EXPECT_TRUE(iter.Advance());
400 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
401 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
402 EXPECT_TRUE(iter.Advance());
403 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
404 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
405 EXPECT_FALSE(iter.Advance());
406 }
407
408 // This test uses Khmer rules to check that different character set combinations
409 // properly find word breaks and skippable characters.
410 TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) {
411 // The string "Ми ទេzoo. ," which contains Russian, Khmer, and English
412 // characters, in that order.
413 base::string16 text(base::WideToUTF16(L"\x041C\x0438 \x1791\x17c1zoo. ,"));
414 BreakIterator iter(text, GetRulesForLanguage("km"));
415 ASSERT_TRUE(iter.Init());
416
417 EXPECT_TRUE(iter.Advance());
418 // Finds each Russian character and the space.
419 EXPECT_EQ(base::WideToUTF16(L"\x041C"), iter.GetString());
420 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
421 EXPECT_TRUE(iter.Advance());
422 EXPECT_EQ(base::WideToUTF16(L"\x0438"), iter.GetString());
423 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
424 EXPECT_TRUE(iter.Advance());
425 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
426 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
427 EXPECT_TRUE(iter.Advance());
428 // Finds all the Khmer characters.
429 EXPECT_EQ(base::WideToUTF16(L"\x1791\x17c1"), iter.GetString());
430 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
431 EXPECT_TRUE(iter.Advance());
432 // Finds each character in "zoo".
433 EXPECT_EQ(base::UTF8ToUTF16("z"), iter.GetString());
434 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
435 EXPECT_TRUE(iter.Advance());
436 EXPECT_EQ(base::UTF8ToUTF16("o"), iter.GetString());
437 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
438 EXPECT_TRUE(iter.Advance());
439 EXPECT_EQ(base::UTF8ToUTF16("o"), iter.GetString());
440 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
441 EXPECT_TRUE(iter.Advance());
442 // Finds the period, space, and comma.
443 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
444 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
445 EXPECT_TRUE(iter.Advance());
446 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
447 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
448 EXPECT_TRUE(iter.Advance());
449 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString());
450 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
451 EXPECT_FALSE(iter.Advance());
452 }
OLDNEW
« base/i18n/break_iterator_unittest.cc ('K') | « base/i18n/break_iterator_unittest.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698