Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(257)

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc

Issue 1272683002: Creates BreakIterator::GetWordBreakStatus. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Made new function, added tests. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« base/i18n/break_iterator.h ('K') | « base/i18n/break_iterator.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <string> 5 #include <string>
6 #include <vector> 6 #include <vector>
7 7
8 #include "base/format_macros.h" 8 #include "base/format_macros.h"
9 #include "base/i18n/break_iterator.h"
9 #include "base/strings/string_split.h" 10 #include "base/strings/string_split.h"
10 #include "base/strings/stringprintf.h" 11 #include "base/strings/stringprintf.h"
11 #include "base/strings/utf_string_conversions.h" 12 #include "base/strings/utf_string_conversions.h"
12 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" 13 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
13 #include "testing/gtest/include/gtest/gtest.h" 14 #include "testing/gtest/include/gtest/gtest.h"
14 15
16 using base::i18n::BreakIterator;
17
15 namespace { 18 namespace {
16 19
17 struct TestCase { 20 struct TestCase {
18 const char* language; 21 const char* language;
19 bool allow_contraction; 22 bool allow_contraction;
20 const wchar_t* expected_words; 23 const wchar_t* expected_words;
21 }; 24 };
22 25
23 } // namespace 26 } // namespace
24 27
28 base::string16 GetRulesForLanguage(const std::string& language) {
please use gerrit instead 2015/08/07 17:16:59 Put this in anonymous namespace.
Julius 2015/08/07 20:30:04 Done.
29 SpellcheckCharAttribute attribute;
30 attribute.SetDefaultLanguage(language);
31 return attribute.GetRuleSet(true);
32 }
33
25 // Tests whether or not our SpellcheckWordIterator can extract only words used 34 // Tests whether or not our SpellcheckWordIterator can extract only words used
26 // by the specified language from a multi-language text. 35 // by the specified language from a multi-language text.
27 TEST(SpellcheckWordIteratorTest, SplitWord) { 36 TEST(SpellcheckWordIteratorTest, SplitWord) {
28 // An input text. This text includes words of several languages. (Some words 37 // An input text. This text includes words of several languages. (Some words
29 // are not separated with whitespace characters.) Our SpellcheckWordIterator 38 // are not separated with whitespace characters.) Our SpellcheckWordIterator
30 // should extract only the words used by the specified language from this text 39 // should extract only the words used by the specified language from this text
31 // and normalize them so our spell-checker can check their spellings. 40 // and normalize them so our spell-checker can check their spellings.
32 const wchar_t kTestText[] = 41 const wchar_t kTestText[] =
33 // Graphic characters 42 // Graphic characters
34 L"!@#$%^&*()" 43 L"!@#$%^&*()"
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 } 297 }
289 298
290 // Test initialization fails when no default language is set. 299 // Test initialization fails when no default language is set.
291 { 300 {
292 SpellcheckCharAttribute attributes; 301 SpellcheckCharAttribute attributes;
293 302
294 SpellcheckWordIterator iterator; 303 SpellcheckWordIterator iterator;
295 EXPECT_FALSE(iterator.Initialize(&attributes, true)); 304 EXPECT_FALSE(iterator.Initialize(&attributes, true));
296 } 305 }
297 } 306 }
307
308 // Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting
309 // IS_NOT_WORD_BREAK.
310 TEST(SpellcheckWordIteratorTest, BreakLine) {
311 // A string containing English, Khmer, and Russian characters.
please use gerrit instead 2015/08/07 17:16:59 Put actual text into the comment.
Julius 2015/08/07 20:30:04 Done.
312 base::string16 text(
313 base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438..."));
314 BreakIterator iter(text, BreakIterator::BREAK_LINE);
315 ASSERT_TRUE(iter.Init());
316
317 EXPECT_TRUE(iter.Advance());
318 // Finds "foo".
319 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
320 EXPECT_TRUE(iter.Advance());
321 // Finds the space, the Khmer characters, and the next space.
322 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
323 EXPECT_TRUE(iter.Advance());
324 // Finds "Can".
325 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
326 EXPECT_TRUE(iter.Advance());
327 // Finds the Russian characters and periods.
328 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK);
329 EXPECT_FALSE(iter.Advance());
330 }
331
332 // Check that different character set combinations properly find word breaks and
333 // skippable characters.
334 TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) {
335 // A string containing English, Khmer, and Russian characters.
336 base::string16 text(
337 base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438..."));
338 BreakIterator iter(text, GetRulesForLanguage("en-US"));
339 ASSERT_TRUE(iter.Init());
340
341 EXPECT_TRUE(iter.Advance());
342 // Finds "foo".
343 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
344 EXPECT_TRUE(iter.Advance());
345 // Finds the space and then the Khmer characters.
346 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
347 EXPECT_TRUE(iter.Advance());
348 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
349 EXPECT_TRUE(iter.Advance());
350 // Finds the next space and "Can".
351 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
352 EXPECT_TRUE(iter.Advance());
353 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
354 EXPECT_TRUE(iter.Advance());
355 // Finds the next space and each Russian character.
356 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
357 EXPECT_TRUE(iter.Advance());
358 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
359 EXPECT_TRUE(iter.Advance());
360 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
361 EXPECT_TRUE(iter.Advance());
362 // Finds the periods at the end.
363 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
364 EXPECT_TRUE(iter.Advance());
365 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
366 EXPECT_TRUE(iter.Advance());
367 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
368 EXPECT_FALSE(iter.Advance());
369 }
370
371 TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) {
372 // Text containing Russian, English, and Khmer chracters.
373 base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17c1 "));
374 BreakIterator iter(text, GetRulesForLanguage("ru-RU"));
375 ASSERT_TRUE(iter.Init());
376
377 EXPECT_TRUE(iter.Advance());
378 // Finds the period and semicolon.
379 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
380 EXPECT_TRUE(iter.Advance());
381 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
382 EXPECT_TRUE(iter.Advance());
383 // Finds all the Russian characters.
384 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
385 EXPECT_TRUE(iter.Advance());
386 // Finds the space and "Can".
387 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
388 EXPECT_TRUE(iter.Advance());
389 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
390 EXPECT_TRUE(iter.Advance());
391 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
392 EXPECT_TRUE(iter.Advance());
393 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
394 EXPECT_TRUE(iter.Advance());
395 // Finds the next space, the Khmer characters, and the last two spaces.
396 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
397 EXPECT_TRUE(iter.Advance());
398 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
399 EXPECT_TRUE(iter.Advance());
400 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
401 EXPECT_TRUE(iter.Advance());
402 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
403 EXPECT_FALSE(iter.Advance());
404 }
405
406 TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) {
407 // Text containing Russian, Khmer, and English characters.
408 base::string16 text(base::WideToUTF16(L"\x041C\x0438 \x1791\x17c1zoo. ,"));
409 BreakIterator iter(text, GetRulesForLanguage("km"));
410 ASSERT_TRUE(iter.Init());
411
412 EXPECT_TRUE(iter.Advance());
413 // Finds each Russian character and the space.
414 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
415 EXPECT_TRUE(iter.Advance());
416 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
417 EXPECT_TRUE(iter.Advance());
418 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
419 EXPECT_TRUE(iter.Advance());
420 // Finds all the Khmer characters.
421 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK);
422 EXPECT_TRUE(iter.Advance());
423 // Finds each character in "zoo".
424 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
425 EXPECT_TRUE(iter.Advance());
426 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
427 EXPECT_TRUE(iter.Advance());
428 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
429 EXPECT_TRUE(iter.Advance());
430 // Finds the period, space, and comma.
431 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
432 EXPECT_TRUE(iter.Advance());
433 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
434 EXPECT_TRUE(iter.Advance());
435 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD);
436 EXPECT_FALSE(iter.Advance());
437 }
OLDNEW
« base/i18n/break_iterator.h ('K') | « base/i18n/break_iterator.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698