Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(45)

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc

Issue 1272683002: Creates BreakIterator::GetWordBreakStatus. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Updated Khmer tests and ASCII-fied comments. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/i18n/break_iterator_unittest.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <string> 5 #include <string>
6 #include <vector> 6 #include <vector>
7 7
8 #include "base/format_macros.h" 8 #include "base/format_macros.h"
9 #include "base/i18n/break_iterator.h"
9 #include "base/strings/string_split.h" 10 #include "base/strings/string_split.h"
10 #include "base/strings/stringprintf.h" 11 #include "base/strings/stringprintf.h"
11 #include "base/strings/utf_string_conversions.h" 12 #include "base/strings/utf_string_conversions.h"
12 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" 13 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
13 #include "testing/gtest/include/gtest/gtest.h" 14 #include "testing/gtest/include/gtest/gtest.h"
14 15
16 using base::i18n::BreakIterator;
17
15 namespace { 18 namespace {
16 19
17 struct TestCase { 20 struct TestCase {
18 const char* language; 21 const char* language;
19 bool allow_contraction; 22 bool allow_contraction;
20 const wchar_t* expected_words; 23 const wchar_t* expected_words;
21 }; 24 };
22 25
26 base::string16 GetRulesForLanguage(const std::string& language) {
27 SpellcheckCharAttribute attribute;
28 attribute.SetDefaultLanguage(language);
29 return attribute.GetRuleSet(true);
30 }
31
23 } // namespace 32 } // namespace
24 33
25 // Tests whether or not our SpellcheckWordIterator can extract only words used 34 // Tests whether or not our SpellcheckWordIterator can extract only words used
26 // by the specified language from a multi-language text. 35 // by the specified language from a multi-language text.
27 TEST(SpellcheckWordIteratorTest, SplitWord) { 36 TEST(SpellcheckWordIteratorTest, SplitWord) {
28 // An input text. This text includes words of several languages. (Some words 37 // An input text. This text includes words of several languages. (Some words
29 // are not separated with whitespace characters.) Our SpellcheckWordIterator 38 // are not separated with whitespace characters.) Our SpellcheckWordIterator
30 // should extract only the words used by the specified language from this text 39 // should extract only the words used by the specified language from this text
31 // and normalize them so our spell-checker can check their spellings. 40 // and normalize them so our spell-checker can check their spellings.
32 const wchar_t kTestText[] = 41 const wchar_t kTestText[] =
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 } 297 }
289 298
290 // Test initialization fails when no default language is set. 299 // Test initialization fails when no default language is set.
291 { 300 {
292 SpellcheckCharAttribute attributes; 301 SpellcheckCharAttribute attributes;
293 302
294 SpellcheckWordIterator iterator; 303 SpellcheckWordIterator iterator;
295 EXPECT_FALSE(iterator.Initialize(&attributes, true)); 304 EXPECT_FALSE(iterator.Initialize(&attributes, true));
296 } 305 }
297 } 306 }
307
308 // This test uses English rules to check that different character set
309 // combinations properly find word breaks and skippable characters.
310 TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) {
311 // A string containing the English word "foo", followed by two Khmer
312 // characters, the English word "Can", and then two Russian characters and
313 // punctuation.
314 base::string16 text(
315 base::WideToUTF16(L"foo \x1791\x17C1 Can \x041C\x0438..."));
316 BreakIterator iter(text, GetRulesForLanguage("en-US"));
317 ASSERT_TRUE(iter.Init());
318
319 EXPECT_TRUE(iter.Advance());
320 // Finds "foo".
321 EXPECT_EQ(base::UTF8ToUTF16("foo"), iter.GetString());
322 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
323 EXPECT_TRUE(iter.Advance());
324 // Finds the space and then the Khmer characters.
325 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
326 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
327 EXPECT_TRUE(iter.Advance());
328 EXPECT_EQ(base::WideToUTF16(L"\x1791\x17C1"), iter.GetString());
329 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
330 EXPECT_TRUE(iter.Advance());
331 // Finds the next space and "Can".
332 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
333 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
334 EXPECT_TRUE(iter.Advance());
335 EXPECT_EQ(base::UTF8ToUTF16("Can"), iter.GetString());
336 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
337 EXPECT_TRUE(iter.Advance());
338 // Finds the next space and each Russian character.
339 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
340 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
341 EXPECT_TRUE(iter.Advance());
342 EXPECT_EQ(base::WideToUTF16(L"\x041C"), iter.GetString());
343 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
344 EXPECT_TRUE(iter.Advance());
345 EXPECT_EQ(base::WideToUTF16(L"\x0438"), iter.GetString());
346 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
347 EXPECT_TRUE(iter.Advance());
348 // Finds the periods at the end.
349 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
350 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
351 EXPECT_TRUE(iter.Advance());
352 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
353 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
354 EXPECT_TRUE(iter.Advance());
355 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
356 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
357 EXPECT_FALSE(iter.Advance());
358 }
359
360 // This test uses Russian rules to check that different character set
361 // combinations properly find word breaks and skippable characters.
362 TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) {
363 // A string containing punctuation followed by two Russian characters, the
364 // English word "Can", and then two Khmer characters.
365 base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17C1 "));
366 BreakIterator iter(text, GetRulesForLanguage("ru-RU"));
367 ASSERT_TRUE(iter.Init());
368
369 EXPECT_TRUE(iter.Advance());
370 // Finds the period and semicolon.
371 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
372 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
373 EXPECT_TRUE(iter.Advance());
374 EXPECT_EQ(base::UTF8ToUTF16(";"), iter.GetString());
375 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
376 EXPECT_TRUE(iter.Advance());
377 // Finds all the Russian characters.
378 EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438"), iter.GetString());
379 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
380 EXPECT_TRUE(iter.Advance());
381 // Finds the space and each character in "Can".
382 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
383 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
384 EXPECT_TRUE(iter.Advance());
385 EXPECT_EQ(base::UTF8ToUTF16("C"), iter.GetString());
386 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
387 EXPECT_TRUE(iter.Advance());
388 EXPECT_EQ(base::UTF8ToUTF16("a"), iter.GetString());
389 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
390 EXPECT_TRUE(iter.Advance());
391 EXPECT_EQ(base::UTF8ToUTF16("n"), iter.GetString());
392 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
393 EXPECT_TRUE(iter.Advance());
394 // Finds the next space, the Khmer characters, and the last two spaces.
395 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
396 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
397 EXPECT_TRUE(iter.Advance());
398 EXPECT_EQ(base::WideToUTF16(L"\x1791\x17C1"), iter.GetString());
399 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
400 EXPECT_TRUE(iter.Advance());
401 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
402 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
403 EXPECT_TRUE(iter.Advance());
404 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
405 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
406 EXPECT_FALSE(iter.Advance());
407 }
408
409 // This test uses Khmer rules to check that different character set combinations
410 // properly find word breaks and skippable characters.
411 TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) {
412 // A string containing two Russian characters followed by two, three, and two
413 // Khmer characters, and then English characters and punctuation.
414 base::string16 text(base::WideToUTF16(
415 L"\x041C\x0438 \x178F\x17BE \x179B\x17C4\x1780 \x1798\x1780zoo. ,"));
jungshik at Google 2015/08/12 16:56:52 Ick. Sorry it's not clear to you (and for a post-l
416 BreakIterator iter(text, GetRulesForLanguage("km"));
417 ASSERT_TRUE(iter.Init());
418
419 EXPECT_TRUE(iter.Advance());
420 // Finds each Russian character and the space.
421 EXPECT_EQ(base::WideToUTF16(L"\x041C"), iter.GetString());
422 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
423 EXPECT_TRUE(iter.Advance());
424 EXPECT_EQ(base::WideToUTF16(L"\x0438"), iter.GetString());
425 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
426 EXPECT_TRUE(iter.Advance());
427 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
428 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
429 EXPECT_TRUE(iter.Advance());
430 // Finds the first two Khmer characters and the space.
431 EXPECT_EQ(base::WideToUTF16(L"\x178F\x17BE"), iter.GetString());
432 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
433 EXPECT_TRUE(iter.Advance());
434 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
435 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
436 EXPECT_TRUE(iter.Advance());
437 // Finds the next three Khmer characters and the space.
438 EXPECT_EQ(base::WideToUTF16(L"\x179B\x17C4\x1780"), iter.GetString());
439 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
440 EXPECT_TRUE(iter.Advance());
441 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
442 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
443 EXPECT_TRUE(iter.Advance());
444 // Finds the last two Khmer characters.
445 EXPECT_EQ(base::WideToUTF16(L"\x1798\x1780"), iter.GetString());
446 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
447 EXPECT_TRUE(iter.Advance());
448 // Finds each character in "zoo".
449 EXPECT_EQ(base::UTF8ToUTF16("z"), iter.GetString());
450 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
451 EXPECT_TRUE(iter.Advance());
452 EXPECT_EQ(base::UTF8ToUTF16("o"), iter.GetString());
453 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
454 EXPECT_TRUE(iter.Advance());
455 EXPECT_EQ(base::UTF8ToUTF16("o"), iter.GetString());
456 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
457 EXPECT_TRUE(iter.Advance());
458 // Finds the period, space, and comma.
459 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
460 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
461 EXPECT_TRUE(iter.Advance());
462 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
463 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
464 EXPECT_TRUE(iter.Advance());
465 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString());
466 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
467 EXPECT_FALSE(iter.Advance());
468 }
OLDNEW
« no previous file with comments | « base/i18n/break_iterator_unittest.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698