Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <stddef.h> | 5 #include <stddef.h> |
| 6 | 6 |
| 7 #include <string> | 7 #include <string> |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/format_macros.h" | 10 #include "base/format_macros.h" |
| 11 #include "base/i18n/break_iterator.h" | 11 #include "base/i18n/break_iterator.h" |
| 12 #include "base/macros.h" | 12 #include "base/macros.h" |
| 13 #include "base/strings/string_split.h" | 13 #include "base/strings/string_split.h" |
| 14 #include "base/strings/stringprintf.h" | 14 #include "base/strings/stringprintf.h" |
| 15 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
| 16 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" | 16 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" |
| 17 #include "testing/gtest/include/gtest/gtest.h" | 17 #include "testing/gtest/include/gtest/gtest.h" |
| 18 | 18 |
| 19 using base::i18n::BreakIterator; | 19 using base::i18n::BreakIterator; |
| 20 using WordIteratorStatus = SpellcheckWordIterator::WordIteratorStatus; | |
| 20 | 21 |
| 21 namespace { | 22 namespace { |
| 22 | 23 |
| 23 struct TestCase { | 24 struct TestCase { |
| 24 const char* language; | 25 const char* language; |
| 25 bool allow_contraction; | 26 bool allow_contraction; |
| 26 const wchar_t* expected_words; | 27 const wchar_t* expected_words; |
| 27 }; | 28 }; |
| 28 | 29 |
| 29 base::string16 GetRulesForLanguage(const std::string& language) { | 30 base::string16 GetRulesForLanguage(const std::string& language) { |
| 30 SpellcheckCharAttribute attribute; | 31 SpellcheckCharAttribute attribute; |
| 31 attribute.SetDefaultLanguage(language); | 32 attribute.SetDefaultLanguage(language); |
| 32 return attribute.GetRuleSet(true); | 33 return attribute.GetRuleSet(true); |
| 33 } | 34 } |
| 34 | 35 |
| 36 WordIteratorStatus GetNextNonSkippableWord(SpellcheckWordIterator* iterator, | |
| 37 base::string16* word_string, | |
| 38 int* word_start, | |
| 39 int* word_length) { | |
| 40 WordIteratorStatus status = SpellcheckWordIterator::IS_SKIPPABLE; | |
| 41 while (status == SpellcheckWordIterator::IS_SKIPPABLE) | |
| 42 status = iterator->GetNextWord(word_string, word_start, word_length); | |
|
Nico
2016/01/06 01:38:26
with a do-while loop you don't need to initialize
groby-ooo-7-16
2016/01/06 01:48:37
True, but it's one more line :) See below:
Status
Nico
2016/01/06 02:36:16
<s>More horrible</s>You don't need the parens:
| |
| 43 return status; | |
| 44 } | |
| 45 | |
| 35 } // namespace | 46 } // namespace |
| 36 | 47 |
| 37 // Tests whether or not our SpellcheckWordIterator can extract words used by the | 48 // Tests whether or not our SpellcheckWordIterator can extract words used by the |
| 38 // specified language from a multi-language text. | 49 // specified language from a multi-language text. |
| 39 TEST(SpellcheckWordIteratorTest, SplitWord) { | 50 TEST(SpellcheckWordIteratorTest, SplitWord) { |
| 40 // An input text. This text includes words of several languages. (Some words | 51 // An input text. This text includes words of several languages. (Some words |
| 41 // are not separated with whitespace characters.) Our SpellcheckWordIterator | 52 // are not separated with whitespace characters.) Our SpellcheckWordIterator |
| 42 // should extract the words used by the specified language from this text and | 53 // should extract the words used by the specified language from this text and |
| 43 // normalize them so our spell-checker can check their spellings. If | 54 // normalize them so our spell-checker can check their spellings. If |
| 44 // characters are found that are not from the specified language the test | 55 // characters are found that are not from the specified language the test |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 137 SpellcheckWordIterator iterator; | 148 SpellcheckWordIterator iterator; |
| 138 EXPECT_TRUE(iterator.Initialize(&attributes, | 149 EXPECT_TRUE(iterator.Initialize(&attributes, |
| 139 kTestCases[i].allow_contraction)); | 150 kTestCases[i].allow_contraction)); |
| 140 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length())); | 151 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length())); |
| 141 | 152 |
| 142 std::vector<base::string16> expected_words = base::SplitString( | 153 std::vector<base::string16> expected_words = base::SplitString( |
| 143 base::WideToUTF16(kTestCases[i].expected_words), | 154 base::WideToUTF16(kTestCases[i].expected_words), |
| 144 base::string16(1, ' '), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); | 155 base::string16(1, ' '), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
| 145 | 156 |
| 146 base::string16 actual_word; | 157 base::string16 actual_word; |
| 147 int actual_start, actual_end; | 158 int actual_start, actual_len; |
| 148 size_t index = 0; | 159 size_t index = 0; |
| 149 for (SpellcheckWordIterator::WordIteratorStatus status = | 160 for (SpellcheckWordIterator::WordIteratorStatus status = |
| 150 iterator.GetNextWord(&actual_word, &actual_start, &actual_end); | 161 iterator.GetNextWord(&actual_word, &actual_start, &actual_len); |
| 151 status != SpellcheckWordIterator::IS_END_OF_TEXT; | 162 status != SpellcheckWordIterator::IS_END_OF_TEXT; |
| 152 status = | 163 status = |
| 153 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) { | 164 iterator.GetNextWord(&actual_word, &actual_start, &actual_len)) { |
| 154 if (status == SpellcheckWordIterator::WordIteratorStatus::IS_SKIPPABLE) | 165 if (status == SpellcheckWordIterator::WordIteratorStatus::IS_SKIPPABLE) |
| 155 continue; | 166 continue; |
| 156 | 167 |
| 157 EXPECT_TRUE(index < expected_words.size()); | 168 EXPECT_TRUE(index < expected_words.size()); |
| 158 if (index < expected_words.size()) | 169 if (index < expected_words.size()) |
| 159 EXPECT_EQ(expected_words[index], actual_word); | 170 EXPECT_EQ(expected_words[index], actual_word); |
| 160 ++index; | 171 ++index; |
| 161 } | 172 } |
| 162 } | 173 } |
| 163 } | 174 } |
| 164 | 175 |
| 165 // Tests whether our SpellcheckWordIterator extracts an empty word without | 176 // Tests whether our SpellcheckWordIterator extracts an empty word without |
| 166 // getting stuck in an infinite loop when inputting a Khmer text. (This is a | 177 // getting stuck in an infinite loop when inputting a Khmer text. (This is a |
| 167 // regression test for Issue 46278.) | 178 // regression test for Issue 46278.) |
| 168 TEST(SpellcheckWordIteratorTest, RuleSetConsistency) { | 179 TEST(SpellcheckWordIteratorTest, RuleSetConsistency) { |
| 169 SpellcheckCharAttribute attributes; | 180 SpellcheckCharAttribute attributes; |
| 170 attributes.SetDefaultLanguage("en-US"); | 181 attributes.SetDefaultLanguage("en-US"); |
| 171 | 182 |
| 172 const wchar_t kTestText[] = L"\x1791\x17c1\x002e"; | 183 const wchar_t kTestText[] = L"\x1791\x17c1\x002e"; |
| 173 base::string16 input(base::WideToUTF16(kTestText)); | 184 base::string16 input(base::WideToUTF16(kTestText)); |
| 174 | 185 |
| 175 SpellcheckWordIterator iterator; | 186 SpellcheckWordIterator iterator; |
| 176 EXPECT_TRUE(iterator.Initialize(&attributes, true)); | 187 EXPECT_TRUE(iterator.Initialize(&attributes, true)); |
| 177 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length())); | 188 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length())); |
| 178 | 189 |
| 179 // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following | 190 // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following |
| 180 // iterator.GetNextWord() calls get stuck in an infinite loop. Therefore, this | 191 // iterator.GetNextWord() calls get stuck in an infinite loop. Therefore, this |
| 181 // test succeeds if this call returns without timeouts. | 192 // test succeeds if this call returns without timeouts. |
| 182 base::string16 actual_word; | 193 base::string16 actual_word; |
| 183 int actual_start, actual_end; | 194 int actual_start, actual_len; |
| 184 SpellcheckWordIterator::WordIteratorStatus status; | 195 WordIteratorStatus status = GetNextNonSkippableWord( |
| 185 for (status = iterator.GetNextWord(&actual_word, &actual_start, &actual_end); | 196 &iterator, &actual_word, &actual_start, &actual_len); |
| 186 status == SpellcheckWordIterator::IS_SKIPPABLE; | |
| 187 status = | |
| 188 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) { | |
| 189 continue; | |
| 190 } | |
| 191 | 197 |
| 192 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_END_OF_TEXT, status); | 198 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_END_OF_TEXT, status); |
| 193 EXPECT_EQ(0, actual_start); | 199 EXPECT_EQ(0, actual_start); |
| 194 EXPECT_EQ(0, actual_end); | 200 EXPECT_EQ(0, actual_len); |
| 195 } | 201 } |
| 196 | 202 |
| 197 // Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters | 203 // Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters |
| 198 // on LTR languages. On the other hand, it should not treat ASCII numbers as | 204 // on LTR languages. On the other hand, it should not treat ASCII numbers as |
| 199 // word characters on RTL languages because they change the text direction from | 205 // word characters on RTL languages because they change the text direction from |
| 200 // RTL to LTR. | 206 // RTL to LTR. |
| 201 TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) { | 207 TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) { |
| 202 // A set of a language, a dummy word, and a text direction used in this test. | 208 // A set of a language, a dummy word, and a text direction used in this test. |
| 203 // For each language, this test splits a dummy word, which consists of ASCII | 209 // For each language, this test splits a dummy word, which consists of ASCII |
| 204 // numbers and an alphabet of the language, into words. When ASCII numbers are | 210 // numbers and an alphabet of the language, into words. When ASCII numbers are |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 242 | 248 |
| 243 SpellcheckCharAttribute attributes; | 249 SpellcheckCharAttribute attributes; |
| 244 attributes.SetDefaultLanguage(kTestCases[i].language); | 250 attributes.SetDefaultLanguage(kTestCases[i].language); |
| 245 | 251 |
| 246 base::string16 input_word(base::WideToUTF16(kTestCases[i].text)); | 252 base::string16 input_word(base::WideToUTF16(kTestCases[i].text)); |
| 247 SpellcheckWordIterator iterator; | 253 SpellcheckWordIterator iterator; |
| 248 EXPECT_TRUE(iterator.Initialize(&attributes, true)); | 254 EXPECT_TRUE(iterator.Initialize(&attributes, true)); |
| 249 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); | 255 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); |
| 250 | 256 |
| 251 base::string16 actual_word; | 257 base::string16 actual_word; |
| 252 int actual_start, actual_end; | 258 int actual_start, actual_len; |
| 253 SpellcheckWordIterator::WordIteratorStatus status; | 259 WordIteratorStatus status = GetNextNonSkippableWord( |
| 254 for (status = | 260 &iterator, &actual_word, &actual_start, &actual_len); |
| 255 iterator.GetNextWord(&actual_word, &actual_start, &actual_end); | |
| 256 status == SpellcheckWordIterator::IS_SKIPPABLE; | |
| 257 status = | |
| 258 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) { | |
| 259 continue; | |
| 260 } | |
| 261 | 261 |
| 262 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status); | 262 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status); |
| 263 if (kTestCases[i].left_to_right) | 263 if (kTestCases[i].left_to_right) |
| 264 EXPECT_EQ(input_word, actual_word); | 264 EXPECT_EQ(input_word, actual_word); |
| 265 else | 265 else |
| 266 EXPECT_NE(input_word, actual_word); | 266 EXPECT_NE(input_word, actual_word); |
| 267 } | 267 } |
| 268 } | 268 } |
| 269 | 269 |
| 270 // Vertify SpellcheckWordIterator treats typographical apostrophe as a part of | 270 // Verify SpellcheckWordIterator treats typographical apostrophe as a part of |
| 271 // the word. | 271 // the word. |
| 272 TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) { | 272 TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) { |
| 273 static const struct { | 273 static const struct { |
| 274 const char* language; | 274 const char* language; |
| 275 const wchar_t* word; | 275 const wchar_t* input; |
| 276 const wchar_t* expected; | |
| 276 } kTestCases[] = { | 277 } kTestCases[] = { |
| 277 // Typewriter apostrophe: | 278 // Typewriter apostrophe: |
| 278 { | 279 {"en-AU", L"you're", L"you're"}, |
| 279 "en-AU", L"you're" | 280 {"en-CA", L"you're", L"you're"}, |
| 280 }, { | 281 {"en-GB", L"you're", L"you're"}, |
| 281 "en-CA", L"you're" | 282 {"en-US", L"you're", L"you're"}, |
| 282 }, { | 283 {"en-US", L"!!!!you're", L"you're"}, |
| 283 "en-GB", L"you're" | 284 // Typographical apostrophe: |
| 284 }, { | 285 {"en-AU", L"you\x2019re", L"you\x2019re"}, |
| 285 "en-US", L"you're" | 286 {"en-CA", L"you\x2019re", L"you\x2019re"}, |
| 286 }, | 287 {"en-GB", L"you\x2019re", L"you\x2019re"}, |
| 287 // Typographical apostrophe: | 288 {"en-US", L"you\x2019re", L"you\x2019re"}, |
| 288 { | 289 {"en-US", L"....you\x2019re", L"you\x2019re"}, |
| 289 "en-AU", L"you\x2019re" | |
| 290 }, { | |
| 291 "en-CA", L"you\x2019re" | |
| 292 }, { | |
| 293 "en-GB", L"you\x2019re" | |
| 294 }, { | |
| 295 "en-US", L"you\x2019re" | |
| 296 }, | |
| 297 }; | 290 }; |
| 298 | 291 |
| 299 for (size_t i = 0; i < arraysize(kTestCases); ++i) { | 292 for (size_t i = 0; i < arraysize(kTestCases); ++i) { |
| 300 SpellcheckCharAttribute attributes; | 293 SpellcheckCharAttribute attributes; |
| 301 attributes.SetDefaultLanguage(kTestCases[i].language); | 294 attributes.SetDefaultLanguage(kTestCases[i].language); |
| 302 | 295 |
| 303 base::string16 input_word(base::WideToUTF16(kTestCases[i].word)); | 296 base::string16 input_word(base::WideToUTF16(kTestCases[i].input)); |
| 297 base::string16 expected_word(base::WideToUTF16(kTestCases[i].expected)); | |
| 304 SpellcheckWordIterator iterator; | 298 SpellcheckWordIterator iterator; |
| 305 EXPECT_TRUE(iterator.Initialize(&attributes, true)); | 299 EXPECT_TRUE(iterator.Initialize(&attributes, true)); |
| 306 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); | 300 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); |
| 307 | 301 |
| 308 base::string16 actual_word; | 302 base::string16 actual_word; |
| 309 int actual_start, actual_end; | 303 int actual_start, actual_len; |
| 310 SpellcheckWordIterator::WordIteratorStatus status; | 304 WordIteratorStatus status = GetNextNonSkippableWord( |
| 311 for (status = | 305 &iterator, &actual_word, &actual_start, &actual_len); |
| 312 iterator.GetNextWord(&actual_word, &actual_start, &actual_end); | |
| 313 status == SpellcheckWordIterator::IS_SKIPPABLE; | |
| 314 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) { | |
| 315 continue; | |
| 316 } | |
| 317 | 306 |
| 318 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status); | 307 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status); |
| 319 EXPECT_EQ(input_word, actual_word); | 308 EXPECT_EQ(expected_word, actual_word); |
| 320 EXPECT_EQ(0, actual_start); | 309 EXPECT_LE(0, actual_start); |
| 321 EXPECT_EQ(input_word.length(), | 310 EXPECT_EQ(expected_word.length(), |
| 322 static_cast<base::string16::size_type>(actual_end)); | 311 static_cast<base::string16::size_type>(actual_len)); |
| 323 } | 312 } |
| 324 } | 313 } |
| 325 | 314 |
| 326 TEST(SpellcheckWordIteratorTest, Initialization) { | 315 TEST(SpellcheckWordIteratorTest, Initialization) { |
| 327 // Test initialization works when a default language is set. | 316 // Test initialization works when a default language is set. |
| 328 { | 317 { |
| 329 SpellcheckCharAttribute attributes; | 318 SpellcheckCharAttribute attributes; |
| 330 attributes.SetDefaultLanguage("en-US"); | 319 attributes.SetDefaultLanguage("en-US"); |
| 331 | 320 |
| 332 SpellcheckWordIterator iterator; | 321 SpellcheckWordIterator iterator; |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 491 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString()); | 480 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString()); |
| 492 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); | 481 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| 493 EXPECT_TRUE(iter.Advance()); | 482 EXPECT_TRUE(iter.Advance()); |
| 494 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString()); | 483 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString()); |
| 495 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); | 484 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| 496 EXPECT_TRUE(iter.Advance()); | 485 EXPECT_TRUE(iter.Advance()); |
| 497 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString()); | 486 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString()); |
| 498 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); | 487 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
| 499 EXPECT_FALSE(iter.Advance()); | 488 EXPECT_FALSE(iter.Advance()); |
| 500 } | 489 } |
| OLD | NEW |