OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <string> | 5 #include <string> |
6 #include <vector> | 6 #include <vector> |
7 | 7 |
8 #include "base/format_macros.h" | 8 #include "base/format_macros.h" |
9 #include "base/i18n/break_iterator.h" | |
9 #include "base/strings/string_split.h" | 10 #include "base/strings/string_split.h" |
10 #include "base/strings/stringprintf.h" | 11 #include "base/strings/stringprintf.h" |
11 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
12 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" | 13 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" |
13 #include "testing/gtest/include/gtest/gtest.h" | 14 #include "testing/gtest/include/gtest/gtest.h" |
14 | 15 |
16 using base::i18n::BreakIterator; | |
17 | |
15 namespace { | 18 namespace { |
16 | 19 |
17 struct TestCase { | 20 struct TestCase { |
18 const char* language; | 21 const char* language; |
19 bool allow_contraction; | 22 bool allow_contraction; |
20 const wchar_t* expected_words; | 23 const wchar_t* expected_words; |
21 }; | 24 }; |
22 | 25 |
26 base::string16 GetRulesForLanguage(const std::string& language) { | |
27 SpellcheckCharAttribute attribute; | |
28 attribute.SetDefaultLanguage(language); | |
29 return attribute.GetRuleSet(true); | |
30 } | |
31 | |
23 } // namespace | 32 } // namespace |
24 | 33 |
25 // Tests whether or not our SpellcheckWordIterator can extract only words used | 34 // Tests whether or not our SpellcheckWordIterator can extract only words used |
26 // by the specified language from a multi-language text. | 35 // by the specified language from a multi-language text. |
27 TEST(SpellcheckWordIteratorTest, SplitWord) { | 36 TEST(SpellcheckWordIteratorTest, SplitWord) { |
28 // An input text. This text includes words of several languages. (Some words | 37 // An input text. This text includes words of several languages. (Some words |
29 // are not separated with whitespace characters.) Our SpellcheckWordIterator | 38 // are not separated with whitespace characters.) Our SpellcheckWordIterator |
30 // should extract only the words used by the specified language from this text | 39 // should extract only the words used by the specified language from this text |
31 // and normalize them so our spell-checker can check their spellings. | 40 // and normalize them so our spell-checker can check their spellings. |
32 const wchar_t kTestText[] = | 41 const wchar_t kTestText[] = |
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
288 } | 297 } |
289 | 298 |
290 // Test initialization fails when no default language is set. | 299 // Test initialization fails when no default language is set. |
291 { | 300 { |
292 SpellcheckCharAttribute attributes; | 301 SpellcheckCharAttribute attributes; |
293 | 302 |
294 SpellcheckWordIterator iterator; | 303 SpellcheckWordIterator iterator; |
295 EXPECT_FALSE(iterator.Initialize(&attributes, true)); | 304 EXPECT_FALSE(iterator.Initialize(&attributes, true)); |
296 } | 305 } |
297 } | 306 } |
307 | |
308 // Make sure that when not in RULE_BASED or BREAK_WORD mode we're getting | |
309 // IS_NOT_WORD_BREAK. | |
310 TEST(SpellcheckWordIteratorTest, BreakLine) { | |
please use gerrit instead
2015/08/07 20:53:10
This test should be in base/.
Also add a test for
Julius
2015/08/10 16:06:37
Done.
| |
311 // The string "foo ទេ Can Ми..." which contains English, Khmer, and Russian | |
312 // characters, in that order. | |
313 base::string16 text( | |
314 base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438...")); | |
please use gerrit instead
2015/08/07 20:53:10
Put a newline in there, so that you you get one re
Julius
2015/08/10 16:06:37
Well, it's still going to be IS_NOT_WORD_BREAK if
| |
315 BreakIterator iter(text, BreakIterator::BREAK_LINE); | |
316 ASSERT_TRUE(iter.Init()); | |
317 | |
318 EXPECT_TRUE(iter.Advance()); | |
319 // Finds "foo". | |
please use gerrit instead
2015/08/07 20:53:10
Also add this throught:
EXPECT_EQ(base::WideToUTF
Julius
2015/08/10 16:06:37
Done.
| |
320 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK); | |
please use gerrit instead
2015/08/07 20:53:10
Can you think of a better name for BreakIterator::
Julius
2015/08/10 16:06:37
IS_LINE_OR_CHAR_BREAK seems good.
| |
321 EXPECT_TRUE(iter.Advance()); | |
322 // Finds the space, the Khmer characters, and the next space. | |
323 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK); | |
324 EXPECT_TRUE(iter.Advance()); | |
325 // Finds "Can". | |
326 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK); | |
327 EXPECT_TRUE(iter.Advance()); | |
328 // Finds the Russian characters and periods. | |
329 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_NOT_WORD_BREAK); | |
330 EXPECT_FALSE(iter.Advance()); | |
331 } | |
332 | |
333 // This test uses English rules to check that different character set | |
334 // combinations properly find word breaks and skippable characters. | |
335 TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) { | |
336 // The string "foo ទេ Can Ми..." which contains English, Khmer, and Russian | |
337 // characters, in that order. | |
338 base::string16 text( | |
339 base::WideToUTF16(L"foo \x1791\x17c1 Can \x041C\x0438...")); | |
340 BreakIterator iter(text, GetRulesForLanguage("en-US")); | |
341 ASSERT_TRUE(iter.Init()); | |
342 | |
343 EXPECT_TRUE(iter.Advance()); | |
344 // Finds "foo". | |
345 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK); | |
346 EXPECT_TRUE(iter.Advance()); | |
347 // Finds the space and then the Khmer characters. | |
348 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
349 EXPECT_TRUE(iter.Advance()); | |
350 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
351 EXPECT_TRUE(iter.Advance()); | |
352 // Finds the next space and "Can". | |
353 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
354 EXPECT_TRUE(iter.Advance()); | |
355 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK); | |
356 EXPECT_TRUE(iter.Advance()); | |
357 // Finds the next space and each Russian character. | |
358 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
359 EXPECT_TRUE(iter.Advance()); | |
360 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
361 EXPECT_TRUE(iter.Advance()); | |
362 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
363 EXPECT_TRUE(iter.Advance()); | |
364 // Finds the periods at the end. | |
365 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
366 EXPECT_TRUE(iter.Advance()); | |
367 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
368 EXPECT_TRUE(iter.Advance()); | |
369 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
370 EXPECT_FALSE(iter.Advance()); | |
371 } | |
372 | |
373 // This test uses Russian rules to check that different character set | |
374 // combinations properly find word breaks and skippable characters. | |
375 TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) { | |
376 // The string ".;Ми Can ទេ " which contains Russian, English, and Khmer | |
377 // characters, in that order. | |
378 base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17c1 ")); | |
379 BreakIterator iter(text, GetRulesForLanguage("ru-RU")); | |
380 ASSERT_TRUE(iter.Init()); | |
381 | |
382 EXPECT_TRUE(iter.Advance()); | |
383 // Finds the period and semicolon. | |
384 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
385 EXPECT_TRUE(iter.Advance()); | |
386 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
387 EXPECT_TRUE(iter.Advance()); | |
388 // Finds all the Russian characters. | |
389 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK); | |
390 EXPECT_TRUE(iter.Advance()); | |
391 // Finds the space and "Can". | |
392 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
393 EXPECT_TRUE(iter.Advance()); | |
394 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
395 EXPECT_TRUE(iter.Advance()); | |
396 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
397 EXPECT_TRUE(iter.Advance()); | |
398 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
399 EXPECT_TRUE(iter.Advance()); | |
400 // Finds the next space, the Khmer characters, and the last two spaces. | |
401 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
402 EXPECT_TRUE(iter.Advance()); | |
403 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
404 EXPECT_TRUE(iter.Advance()); | |
405 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
406 EXPECT_TRUE(iter.Advance()); | |
407 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
408 EXPECT_FALSE(iter.Advance()); | |
409 } | |
410 | |
411 // This test uses Khmer rules to check that different character set combinations | |
412 // properly find word breaks and skippable characters. | |
413 TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) { | |
414 // The string "Ми ទេzoo. ," which contains Russian, Khmer, and English | |
415 // characters, in that order. | |
416 base::string16 text(base::WideToUTF16(L"\x041C\x0438 \x1791\x17c1zoo. ,")); | |
417 BreakIterator iter(text, GetRulesForLanguage("km")); | |
418 ASSERT_TRUE(iter.Init()); | |
419 | |
420 EXPECT_TRUE(iter.Advance()); | |
421 // Finds each Russian character and the space. | |
422 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
423 EXPECT_TRUE(iter.Advance()); | |
424 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
425 EXPECT_TRUE(iter.Advance()); | |
426 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
427 EXPECT_TRUE(iter.Advance()); | |
428 // Finds all the Khmer characters. | |
429 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_WORD_BREAK); | |
430 EXPECT_TRUE(iter.Advance()); | |
431 // Finds each character in "zoo". | |
432 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
433 EXPECT_TRUE(iter.Advance()); | |
434 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
435 EXPECT_TRUE(iter.Advance()); | |
436 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
437 EXPECT_TRUE(iter.Advance()); | |
438 // Finds the period, space, and comma. | |
439 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
440 EXPECT_TRUE(iter.Advance()); | |
441 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
442 EXPECT_TRUE(iter.Advance()); | |
443 EXPECT_TRUE(iter.IsWordBreak() == BreakIterator::IS_SKIPPABLE_WORD); | |
444 EXPECT_FALSE(iter.Advance()); | |
445 } | |
OLD | NEW |