| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "webkit/glue/webkit_glue.h" | |
| 6 | |
| 7 #include "base/file_util.h" | |
| 8 #include "base/message_loop.h" | |
| 9 #include "base/path_service.h" | |
| 10 #include "base/sys_string_conversions.h" | |
| 11 #include "chrome/browser/chrome_thread.h" | |
| 12 #include "chrome/browser/spellchecker.h" | |
| 13 #include "chrome/browser/spellchecker_platform_engine.h" | |
| 14 #include "chrome/common/chrome_paths.h" | |
| 15 #include "testing/gtest/include/gtest/gtest.h" | |
| 16 | |
| 17 namespace { | |
| 18 const FilePath::CharType kTempCustomDictionaryFile[] = | |
| 19 FILE_PATH_LITERAL("temp_custom_dictionary.txt"); | |
| 20 } // namespace | |
| 21 | |
| 22 class SpellCheckTest : public testing::Test { | |
| 23 public: | |
| 24 SpellCheckTest() | |
| 25 : file_thread_(ChromeThread::FILE, &message_loop_), | |
| 26 io_thread_(ChromeThread::IO, &message_loop_) {} | |
| 27 | |
| 28 protected: | |
| 29 MessageLoop message_loop_; | |
| 30 | |
| 31 private: | |
| 32 ChromeThread file_thread_; | |
| 33 ChromeThread io_thread_; // To keep DCHECKs inside spell checker happy. | |
| 34 }; | |
| 35 | |
| 36 // Represents a special initialization function used only for the unit tests | |
| 37 // in this file. | |
| 38 extern void InitHunspellWithFiles(FILE* file_aff_hunspell, | |
| 39 FILE* file_dic_hunspell); | |
| 40 | |
| 41 FilePath GetHunspellDirectory() { | |
| 42 FilePath hunspell_directory; | |
| 43 if (!PathService::Get(base::DIR_SOURCE_ROOT, &hunspell_directory)) | |
| 44 return FilePath(); | |
| 45 | |
| 46 hunspell_directory = hunspell_directory.AppendASCII("third_party"); | |
| 47 hunspell_directory = hunspell_directory.AppendASCII("hunspell"); | |
| 48 hunspell_directory = hunspell_directory.AppendASCII("dictionaries"); | |
| 49 return hunspell_directory; | |
| 50 } | |
| 51 | |
| 52 // Operates unit tests for the webkit_glue::SpellCheckWord() function | |
| 53 // with the US English dictionary. | |
| 54 // The unit tests in this function consist of: | |
| 55 // * Tests for the function with empty strings; | |
| 56 // * Tests for the function with a valid English word; | |
| 57 // * Tests for the function with a valid non-English word; | |
| 58 // * Tests for the function with a valid English word with a preceding | |
| 59 // space character; | |
| 60 // * Tests for the function with a valid English word with a preceding | |
| 61 // non-English word; | |
| 62 // * Tests for the function with a valid English word with a following | |
| 63 // space character; | |
| 64 // * Tests for the function with a valid English word with a following | |
| 65 // non-English word; | |
| 66 // * Tests for the function with two valid English words concatenated | |
| 67 // with space characters or non-English words; | |
| 68 // * Tests for the function with an invalid English word; | |
| 69 // * Tests for the function with an invalid English word with a preceding | |
| 70 // space character; | |
| 71 // * Tests for the function with an invalid English word with a preceding | |
| 72 // non-English word; | |
| 73 // * Tests for the function with2 an invalid English word with a following | |
| 74 // space character; | |
| 75 // * Tests for the function with an invalid English word with a following | |
| 76 // non-English word, and; | |
| 77 // * Tests for the function with two invalid English words concatenated | |
| 78 // with space characters or non-English words. | |
| 79 // A test with a "[ROBUSTNESS]" mark shows it is a robustness test and it uses | |
| 80 // grammartically incorrect string. | |
| 81 // TODO(hbono): Please feel free to add more tests. | |
| 82 TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) { | |
| 83 static const struct { | |
| 84 // A string to be tested. | |
| 85 const wchar_t* input; | |
| 86 // An expected result for this test case. | |
| 87 // * true: the input string does not have any invalid words. | |
| 88 // * false: the input string has one or more invalid words. | |
| 89 bool expected_result; | |
| 90 // The position and the length of the first invalid word. | |
| 91 int misspelling_start; | |
| 92 int misspelling_length; | |
| 93 } kTestCases[] = { | |
| 94 // Empty strings. | |
| 95 {L"", true, 0, 0}, | |
| 96 {L" ", true, 0, 0}, | |
| 97 {L"\xA0", true, 0, 0}, | |
| 98 {L"\x3000", true, 0, 0}, | |
| 99 | |
| 100 // A valid English word "hello". | |
| 101 {L"hello", true, 0, 0}, | |
| 102 // A valid Chinese word (meaning "hello") consisiting of two CJKV | |
| 103 // ideographs | |
| 104 {L"\x4F60\x597D", true, 0, 0}, | |
| 105 // A valid Korean word (meaning "hello") consisting of five hangul | |
| 106 // syllables | |
| 107 {L"\xC548\xB155\xD558\xC138\xC694", true, 0, 0}, | |
| 108 // A valid Japanese word (meaning "hello") consisting of five Hiragana | |
| 109 // letters | |
| 110 {L"\x3053\x3093\x306B\x3061\x306F", true, 0, 0}, | |
| 111 // A valid Hindi word (meaning ?) consisting of six Devanagari letters | |
| 112 // (This word is copied from "http://b/issue?id=857583".) | |
| 113 {L"\x0930\x093E\x091C\x0927\x093E\x0928", true, 0, 0}, | |
| 114 // A valid English word "affix" using a Latin ligature 'ffi' | |
| 115 {L"a\xFB03x", true, 0, 0}, | |
| 116 // A valid English word "hello" (fullwidth version) | |
| 117 {L"\xFF28\xFF45\xFF4C\xFF4C\xFF4F", true, 0, 0}, | |
| 118 // Two valid Greek words (meaning "hello") consisting of seven Greek | |
| 119 // letters | |
| 120 {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true, 0, 0}, | |
| 121 // A valid Russian word (meainng "hello") consisting of twelve Cyrillic | |
| 122 // letters | |
| 123 {L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
| 124 L"\x0442\x0432\x0443\x0439\x0442\x0435", true, 0, 0}, | |
| 125 // A valid English contraction | |
| 126 {L"isn't", true, 0, 0}, | |
| 127 // A valid English word enclosed with underscores. | |
| 128 {L"_hello_", true, 0, 0}, | |
| 129 | |
| 130 // A valid English word with a preceding whitespace | |
| 131 {L" " L"hello", true, 0, 0}, | |
| 132 // A valid English word with a preceding no-break space | |
| 133 {L"\xA0" L"hello", true, 0, 0}, | |
| 134 // A valid English word with a preceding ideographic space | |
| 135 {L"\x3000" L"hello", true, 0, 0}, | |
| 136 // A valid English word with a preceding Chinese word | |
| 137 {L"\x4F60\x597D" L"hello", true, 0, 0}, | |
| 138 // [ROBUSTNESS] A valid English word with a preceding Korean word | |
| 139 {L"\xC548\xB155\xD558\xC138\xC694" L"hello", true, 0, 0}, | |
| 140 // A valid English word with a preceding Japanese word | |
| 141 {L"\x3053\x3093\x306B\x3061\x306F" L"hello", true, 0, 0}, | |
| 142 // [ROBUSTNESS] A valid English word with a preceding Hindi word | |
| 143 {L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello", true, 0, 0}, | |
| 144 // [ROBUSTNESS] A valid English word with two preceding Greek words | |
| 145 {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5" | |
| 146 L"hello", true, 0, 0}, | |
| 147 // [ROBUSTNESS] A valid English word with a preceding Russian word | |
| 148 {L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
| 149 L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true, 0, 0}, | |
| 150 | |
| 151 // A valid English word with a following whitespace | |
| 152 {L"hello" L" ", true, 0, 0}, | |
| 153 // A valid English word with a following no-break space | |
| 154 {L"hello" L"\xA0", true, 0, 0}, | |
| 155 // A valid English word with a following ideographic space | |
| 156 {L"hello" L"\x3000", true, 0, 0}, | |
| 157 // A valid English word with a following Chinese word | |
| 158 {L"hello" L"\x4F60\x597D", true, 0, 0}, | |
| 159 // [ROBUSTNESS] A valid English word with a following Korean word | |
| 160 {L"hello" L"\xC548\xB155\xD558\xC138\xC694", true, 0, 0}, | |
| 161 // A valid English word with a following Japanese word | |
| 162 {L"hello" L"\x3053\x3093\x306B\x3061\x306F", true, 0, 0}, | |
| 163 // [ROBUSTNESS] A valid English word with a following Hindi word | |
| 164 {L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928", true, 0, 0}, | |
| 165 // [ROBUSTNESS] A valid English word with two following Greek words | |
| 166 {L"hello" | |
| 167 L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true, 0, 0}, | |
| 168 // [ROBUSTNESS] A valid English word with a following Russian word | |
| 169 {L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
| 170 L"\x0442\x0432\x0443\x0439\x0442\x0435", true, 0, 0}, | |
| 171 | |
| 172 // Two valid English words concatenated with a whitespace | |
| 173 {L"hello" L" " L"hello", true, 0, 0}, | |
| 174 // Two valid English words concatenated with a no-break space | |
| 175 {L"hello" L"\xA0" L"hello", true, 0, 0}, | |
| 176 // Two valid English words concatenated with an ideographic space | |
| 177 {L"hello" L"\x3000" L"hello", true, 0, 0}, | |
| 178 // Two valid English words concatenated with a Chinese word | |
| 179 {L"hello" L"\x4F60\x597D" L"hello", true, 0, 0}, | |
| 180 // [ROBUSTNESS] Two valid English words concatenated with a Korean word | |
| 181 {L"hello" L"\xC548\xB155\xD558\xC138\xC694" L"hello", true, 0, 0}, | |
| 182 // Two valid English words concatenated with a Japanese word | |
| 183 {L"hello" L"\x3053\x3093\x306B\x3061\x306F" L"hello", true, 0, 0}, | |
| 184 // [ROBUSTNESS] Two valid English words concatenated with a Hindi word | |
| 185 {L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello" , true, 0, 0}, | |
| 186 // [ROBUSTNESS] Two valid English words concatenated with two Greek words | |
| 187 {L"hello" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5" | |
| 188 L"hello", true, 0, 0}, | |
| 189 // [ROBUSTNESS] Two valid English words concatenated with a Russian word | |
| 190 {L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
| 191 L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true, 0, 0}, | |
| 192 // [ROBUSTNESS] Two valid English words concatenated with a contraction | |
| 193 // character. | |
| 194 {L"hello:hello", true, 0, 0}, | |
| 195 | |
| 196 // An invalid English word | |
| 197 {L"ifmmp", false, 0, 5}, | |
| 198 // An invalid English word "bffly" containing a Latin ligature 'ffl' | |
| 199 {L"b\xFB04y", false, 0, 3}, | |
| 200 // An invalid English word "ifmmp" (fullwidth version) | |
| 201 {L"\xFF29\xFF46\xFF4D\xFF4D\xFF50", false, 0, 5}, | |
| 202 // An invalid English contraction | |
| 203 {L"jtm'u", false, 0, 5}, | |
| 204 // An invalid English word enclosed with underscores. | |
| 205 {L"_ifmmp_", false, 1, 5}, | |
| 206 | |
| 207 // An invalid English word with a preceding whitespace | |
| 208 {L" " L"ifmmp", false, 1, 5}, | |
| 209 // An invalid English word with a preceding no-break space | |
| 210 {L"\xA0" L"ifmmp", false, 1, 5}, | |
| 211 // An invalid English word with a preceding ideographic space | |
| 212 {L"\x3000" L"ifmmp", false, 1, 5}, | |
| 213 // An invalid English word with a preceding Chinese word | |
| 214 {L"\x4F60\x597D" L"ifmmp", false, 2, 5}, | |
| 215 // [ROBUSTNESS] An invalid English word with a preceding Korean word | |
| 216 {L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 5, 5}, | |
| 217 // An invalid English word with a preceding Japanese word | |
| 218 {L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 5, 5}, | |
| 219 // [ROBUSTNESS] An invalid English word with a preceding Hindi word | |
| 220 {L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp", false, 6, 5}, | |
| 221 // [ROBUSTNESS] An invalid English word with two preceding Greek words | |
| 222 {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5" | |
| 223 L"ifmmp", false, 8, 5}, | |
| 224 // [ROBUSTNESS] An invalid English word with a preceding Russian word | |
| 225 {L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
| 226 L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 12, 5}, | |
| 227 | |
| 228 // An invalid English word with a following whitespace | |
| 229 {L"ifmmp" L" ", false, 0, 5}, | |
| 230 // An invalid English word with a following no-break space | |
| 231 {L"ifmmp" L"\xA0", false, 0, 5}, | |
| 232 // An invalid English word with a following ideographic space | |
| 233 {L"ifmmp" L"\x3000", false, 0, 5}, | |
| 234 // An invalid English word with a following Chinese word | |
| 235 {L"ifmmp" L"\x4F60\x597D", false, 0, 5}, | |
| 236 // [ROBUSTNESS] An invalid English word with a following Korean word | |
| 237 {L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694", false, 0, 5}, | |
| 238 // An invalid English word with a following Japanese word | |
| 239 {L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F", false, 0, 5}, | |
| 240 // [ROBUSTNESS] An invalid English word with a following Hindi word | |
| 241 {L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928", false, 0, 5}, | |
| 242 // [ROBUSTNESS] An invalid English word with two following Greek words | |
| 243 {L"ifmmp" | |
| 244 L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", false, 0, 5}, | |
| 245 // [ROBUSTNESS] An invalid English word with a following Russian word | |
| 246 {L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
| 247 L"\x0442\x0432\x0443\x0439\x0442\x0435", false, 0, 5}, | |
| 248 | |
| 249 // Two invalid English words concatenated with a whitespace | |
| 250 {L"ifmmp" L" " L"ifmmp", false, 0, 5}, | |
| 251 // Two invalid English words concatenated with a no-break space | |
| 252 {L"ifmmp" L"\xA0" L"ifmmp", false, 0, 5}, | |
| 253 // Two invalid English words concatenated with an ideographic space | |
| 254 {L"ifmmp" L"\x3000" L"ifmmp", false, 0, 5}, | |
| 255 // Two invalid English words concatenated with a Chinese word | |
| 256 {L"ifmmp" L"\x4F60\x597D" L"ifmmp", false, 0, 5}, | |
| 257 // [ROBUSTNESS] Two invalid English words concatenated with a Korean word | |
| 258 {L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 0, 5}, | |
| 259 // Two invalid English words concatenated with a Japanese word | |
| 260 {L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 0, 5}, | |
| 261 // [ROBUSTNESS] Two invalid English words concatenated with a Hindi word | |
| 262 {L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp" , false, 0, 5}, | |
| 263 // [ROBUSTNESS] Two invalid English words concatenated with two Greek words | |
| 264 {L"ifmmp" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5" | |
| 265 L"ifmmp", false, 0, 5}, | |
| 266 // [ROBUSTNESS] Two invalid English words concatenated with a Russian word | |
| 267 {L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
| 268 L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 0, 5}, | |
| 269 // [ROBUSTNESS] Two invalid English words concatenated with a contraction | |
| 270 // character. | |
| 271 {L"ifmmp:ifmmp", false, 0, 11}, | |
| 272 | |
| 273 // [REGRESSION] Issue 13432: "Any word of 13 or 14 characters is not | |
| 274 // spellcheck" <http://crbug.com/13432>. | |
| 275 {L"qwertyuiopasd", false, 0, 13}, | |
| 276 {L"qwertyuiopasdf", false, 0, 14}, | |
| 277 }; | |
| 278 | |
| 279 FilePath hunspell_directory = GetHunspellDirectory(); | |
| 280 ASSERT_FALSE(hunspell_directory.empty()); | |
| 281 | |
| 282 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
| 283 hunspell_directory, "en-US", NULL, FilePath())); | |
| 284 spell_checker->Initialize(); | |
| 285 message_loop_.RunAllPending(); | |
| 286 | |
| 287 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
| 288 size_t input_length = 0; | |
| 289 if (kTestCases[i].input != NULL) { | |
| 290 input_length = wcslen(kTestCases[i].input); | |
| 291 } | |
| 292 int misspelling_start; | |
| 293 int misspelling_length; | |
| 294 bool result = spell_checker->SpellCheckWord( | |
| 295 WideToUTF16(kTestCases[i].input).c_str(), | |
| 296 static_cast<int>(input_length), | |
| 297 0, | |
| 298 &misspelling_start, | |
| 299 &misspelling_length, NULL); | |
| 300 | |
| 301 EXPECT_EQ(kTestCases[i].expected_result, result); | |
| 302 EXPECT_EQ(kTestCases[i].misspelling_start, misspelling_start); | |
| 303 EXPECT_EQ(kTestCases[i].misspelling_length, misspelling_length); | |
| 304 } | |
| 305 } | |
| 306 | |
| 307 TEST_F(SpellCheckTest, SpellCheckSuggestions_EN_US) { | |
| 308 static const struct { | |
| 309 // A string to be tested. | |
| 310 const wchar_t* input; | |
| 311 // An expected result for this test case. | |
| 312 // * true: the input string does not have any invalid words. | |
| 313 // * false: the input string has one or more invalid words. | |
| 314 bool expected_result; | |
| 315 // The position and the length of the first invalid word. | |
| 316 int misspelling_start; | |
| 317 int misspelling_length; | |
| 318 | |
| 319 // A suggested word that should occur. | |
| 320 const wchar_t* suggested_word; | |
| 321 } kTestCases[] = { // A valid English word with a preceding whitespace | |
| 322 // We need to have separate test cases here, since hunspell and the OS X | |
| 323 // spellchecking service occasionally differ on what they consider a valid | |
| 324 // suggestion for a given word, although these lists could likely be | |
| 325 // integrated somewhat. | |
| 326 #if defined(OS_MACOSX) | |
| 327 // These words come from the wikipedia page of the most commonly | |
| 328 // misspelled words in english. | |
| 329 // (http://en.wikipedia.org/wiki/Commonly_misspelled_words). | |
| 330 {L"absense", false, 0, 0, L"absence"}, | |
| 331 {L"acceptible", false, 0, 0, L"acceptable"}, | |
| 332 {L"accidentaly", false, 0, 0, L"accidentally"}, | |
| 333 {L"accomodate", false, 0, 0, L"accommodate"}, | |
| 334 {L"acheive", false, 0, 0, L"achieve"}, | |
| 335 {L"acknowlege", false, 0, 0, L"acknowledge"}, | |
| 336 {L"acquaintence", false, 0, 0, L"acquaintance"}, | |
| 337 {L"aquire", false, 0, 0, L"acquire"}, | |
| 338 {L"aquit", false, 0, 0, L"acquit"}, | |
| 339 {L"acrage", false, 0, 0, L"acreage"}, | |
| 340 {L"adress", false, 0, 0, L"address"}, | |
| 341 {L"adultary", false, 0, 0, L"adultery"}, | |
| 342 {L"advertize", false, 0, 0, L"advertise"}, | |
| 343 {L"adviseable", false, 0, 0, L"advisable"}, | |
| 344 {L"agression", false, 0, 0, L"aggression"}, | |
| 345 {L"alchohol", false, 0, 0, L"alcohol"}, | |
| 346 {L"alege", false, 0, 0, L"allege"}, | |
| 347 {L"allegaince", false, 0, 0, L"allegiance"}, | |
| 348 {L"allmost", false, 0, 0, L"almost"}, | |
| 349 // Ideally, this test should pass. It works in firefox, but not in hunspell | |
| 350 // or OS X. | |
| 351 // {L"alot", false, 0, 0, L"a lot"}, | |
| 352 {L"amatuer", false, 0, 0, L"amateur"}, | |
| 353 {L"ammend", false, 0, 0, L"amend"}, | |
| 354 {L"amung", false, 0, 0, L"among"}, | |
| 355 {L"anually", false, 0, 0, L"annually"}, | |
| 356 {L"apparant", false, 0, 0, L"apparent"}, | |
| 357 {L"artic", false, 0, 0, L"arctic"}, | |
| 358 {L"arguement", false, 0, 0, L"argument"}, | |
| 359 {L"athiest", false, 0, 0, L"atheist"}, | |
| 360 {L"athelete", false, 0, 0, L"athlete"}, | |
| 361 {L"avrage", false, 0, 0, L"average"}, | |
| 362 {L"awfull", false, 0, 0, L"awful"}, | |
| 363 {L"ballance", false, 0, 0, L"balance"}, | |
| 364 {L"basicly", false, 0, 0, L"basically"}, | |
| 365 {L"becuase", false, 0, 0, L"because"}, | |
| 366 {L"becomeing", false, 0, 0, L"becoming"}, | |
| 367 {L"befor", false, 0, 0, L"before"}, | |
| 368 {L"begining", false, 0, 0, L"beginning"}, | |
| 369 {L"beleive", false, 0, 0, L"believe"}, | |
| 370 {L"bellweather", false, 0, 0, L"bellwether"}, | |
| 371 {L"benifit", false, 0, 0, L"benefit"}, | |
| 372 {L"bouy", false, 0, 0, L"buoy"}, | |
| 373 {L"briliant", false, 0, 0, L"brilliant"}, | |
| 374 {L"burgler", false, 0, 0, L"burglar"}, | |
| 375 {L"camoflage", false, 0, 0, L"camouflage"}, | |
| 376 {L"carrer", false, 0, 0, L"career"}, | |
| 377 {L"carefull", false, 0, 0, L"careful"}, | |
| 378 {L"Carribean", false, 0, 0, L"Caribbean"}, | |
| 379 {L"catagory", false, 0, 0, L"category"}, | |
| 380 {L"cauhgt", false, 0, 0, L"caught"}, | |
| 381 {L"cieling", false, 0, 0, L"ceiling"}, | |
| 382 {L"cemetary", false, 0, 0, L"cemetery"}, | |
| 383 {L"certin", false, 0, 0, L"certain"}, | |
| 384 {L"changable", false, 0, 0, L"changeable"}, | |
| 385 {L"cheif", false, 0, 0, L"chief"}, | |
| 386 {L"citezen", false, 0, 0, L"citizen"}, | |
| 387 {L"collaegue", false, 0, 0, L"colleague"}, | |
| 388 {L"colum", false, 0, 0, L"column"}, | |
| 389 {L"comming", false, 0, 0, L"coming"}, | |
| 390 {L"commited", false, 0, 0, L"committed"}, | |
| 391 {L"compitition", false, 0, 0, L"competition"}, | |
| 392 {L"conceed", false, 0, 0, L"concede"}, | |
| 393 {L"congradulate", false, 0, 0, L"congratulate"}, | |
| 394 {L"consciencious", false, 0, 0, L"conscientious"}, | |
| 395 {L"concious", false, 0, 0, L"conscious"}, | |
| 396 {L"concensus", false, 0, 0, L"consensus"}, | |
| 397 {L"contraversy", false, 0, 0, L"controversy"}, | |
| 398 {L"conveniance", false, 0, 0, L"convenience"}, | |
| 399 {L"critecize", false, 0, 0, L"criticize"}, | |
| 400 {L"dacquiri", false, 0, 0, L"daiquiri"}, | |
| 401 {L"decieve", false, 0, 0, L"deceive"}, | |
| 402 {L"dicide", false, 0, 0, L"decide"}, | |
| 403 {L"definate", false, 0, 0, L"definite"}, | |
| 404 {L"definitly", false, 0, 0, L"definitely"}, | |
| 405 {L"deposite", false, 0, 0, L"deposit"}, | |
| 406 {L"desparate", false, 0, 0, L"desperate"}, | |
| 407 {L"develope", false, 0, 0, L"develop"}, | |
| 408 {L"diffrence", false, 0, 0, L"difference"}, | |
| 409 {L"dilema", false, 0, 0, L"dilemma"}, | |
| 410 {L"disapear", false, 0, 0, L"disappear"}, | |
| 411 {L"disapoint", false, 0, 0, L"disappoint"}, | |
| 412 {L"disasterous", false, 0, 0, L"disastrous"}, | |
| 413 {L"disipline", false, 0, 0, L"discipline"}, | |
| 414 {L"drunkeness", false, 0, 0, L"drunkenness"}, | |
| 415 {L"dumbell", false, 0, 0, L"dumbbell"}, | |
| 416 {L"durring", false, 0, 0, L"during"}, | |
| 417 {L"easely", false, 0, 0, L"easily"}, | |
| 418 {L"eigth", false, 0, 0, L"eight"}, | |
| 419 {L"embarass", false, 0, 0, L"embarrass"}, | |
| 420 {L"enviroment", false, 0, 0, L"environment"}, | |
| 421 {L"equiped", false, 0, 0, L"equipped"}, | |
| 422 {L"equiptment", false, 0, 0, L"equipment"}, | |
| 423 {L"exagerate", false, 0, 0, L"exaggerate"}, | |
| 424 {L"excede", false, 0, 0, L"exceed"}, | |
| 425 {L"exellent", false, 0, 0, L"excellent"}, | |
| 426 {L"exsept", false, 0, 0, L"except"}, | |
| 427 {L"exercize", false, 0, 0, L"exercise"}, | |
| 428 {L"exilerate", false, 0, 0, L"exhilarate"}, | |
| 429 {L"existance", false, 0, 0, L"existence"}, | |
| 430 {L"experiance", false, 0, 0, L"experience"}, | |
| 431 {L"experament", false, 0, 0, L"experiment"}, | |
| 432 {L"explaination", false, 0, 0, L"explanation"}, | |
| 433 {L"extreem", false, 0, 0, L"extreme"}, | |
| 434 {L"familier", false, 0, 0, L"familiar"}, | |
| 435 {L"facinating", false, 0, 0, L"fascinating"}, | |
| 436 {L"firey", false, 0, 0, L"fiery"}, | |
| 437 {L"finaly", false, 0, 0, L"finally"}, | |
| 438 {L"flourescent", false, 0, 0, L"fluorescent"}, | |
| 439 {L"foriegn", false, 0, 0, L"foreign"}, | |
| 440 {L"fourty", false, 0, 0, L"forty"}, | |
| 441 {L"foreward", false, 0, 0, L"forward"}, | |
| 442 {L"freind", false, 0, 0, L"friend"}, | |
| 443 {L"fullfil", false, 0, 0, L"fulfill"}, | |
| 444 {L"fundemental", false, 0, 0, L"fundamental"}, | |
| 445 {L"guage", false, 0, 0, L"gauge"}, | |
| 446 {L"generaly", false, 0, 0, L"generally"}, | |
| 447 {L"goverment", false, 0, 0, L"government"}, | |
| 448 {L"grammer", false, 0, 0, L"grammar"}, | |
| 449 {L"gratefull", false, 0, 0, L"grateful"}, | |
| 450 {L"garantee", false, 0, 0, L"guarantee"}, | |
| 451 {L"guidence", false, 0, 0, L"guidance"}, | |
| 452 {L"happyness", false, 0, 0, L"happiness"}, | |
| 453 {L"harrass", false, 0, 0, L"harass"}, | |
| 454 {L"heighth", false, 0, 0, L"height"}, | |
| 455 {L"heirarchy", false, 0, 0, L"hierarchy"}, | |
| 456 {L"humerous", false, 0, 0, L"humorous"}, | |
| 457 {L"hygene", false, 0, 0, L"hygiene"}, | |
| 458 {L"hipocrit", false, 0, 0, L"hypocrite"}, | |
| 459 {L"idenity", false, 0, 0, L"identity"}, | |
| 460 {L"ignorence", false, 0, 0, L"ignorance"}, | |
| 461 {L"imaginery", false, 0, 0, L"imaginary"}, | |
| 462 {L"immitate", false, 0, 0, L"imitate"}, | |
| 463 {L"immitation", false, 0, 0, L"imitation"}, | |
| 464 {L"imediately", false, 0, 0, L"immediately"}, | |
| 465 {L"incidently", false, 0, 0, L"incidentally"}, | |
| 466 {L"independant", false, 0, 0, L"independent"}, | |
| 467 {L"indispensible", false, 0, 0, L"indispensable"}, | |
| 468 {L"innoculate", false, 0, 0, L"inoculate"}, | |
| 469 {L"inteligence", false, 0, 0, L"intelligence"}, | |
| 470 {L"intresting", false, 0, 0, L"interesting"}, | |
| 471 {L"interuption", false, 0, 0, L"interruption"}, | |
| 472 {L"irrelevent", false, 0, 0, L"irrelevant"}, | |
| 473 {L"irritible", false, 0, 0, L"irritable"}, | |
| 474 {L"iland", false, 0, 0, L"island"}, | |
| 475 {L"jellous", false, 0, 0, L"jealous"}, | |
| 476 {L"knowlege", false, 0, 0, L"knowledge"}, | |
| 477 {L"labratory", false, 0, 0, L"laboratory"}, | |
| 478 {L"liesure", false, 0, 0, L"leisure"}, | |
| 479 {L"lenght", false, 0, 0, L"length"}, | |
| 480 {L"liason", false, 0, 0, L"liaison"}, | |
| 481 {L"libary", false, 0, 0, L"library"}, | |
| 482 {L"lisence", false, 0, 0, L"license"}, | |
| 483 {L"lonelyness", false, 0, 0, L"loneliness"}, | |
| 484 {L"lieing", false, 0, 0, L"lying"}, | |
| 485 {L"maintenence", false, 0, 0, L"maintenance"}, | |
| 486 {L"manuever", false, 0, 0, L"maneuver"}, | |
| 487 {L"marrige", false, 0, 0, L"marriage"}, | |
| 488 {L"mathmatics", false, 0, 0, L"mathematics"}, | |
| 489 {L"medcine", false, 0, 0, L"medicine"}, | |
| 490 {L"medeval", false, 0, 0, L"medieval"}, | |
| 491 {L"momento", false, 0, 0, L"memento"}, | |
| 492 {L"millenium", false, 0, 0, L"millennium"}, | |
| 493 {L"miniture", false, 0, 0, L"miniature"}, | |
| 494 {L"minite", false, 0, 0, L"minute"}, | |
| 495 {L"mischevous", false, 0, 0, L"mischievous"}, | |
| 496 {L"mispell", false, 0, 0, L"misspell"}, | |
| 497 // Maybe this one should pass, as it works in hunspell, but not in firefox. | |
| 498 // {L"misterius", false, 0, 0, L"mysterious"}, | |
| 499 {L"naturaly", false, 0, 0, L"naturally"}, | |
| 500 {L"neccessary", false, 0, 0, L"necessary"}, | |
| 501 {L"neice", false, 0, 0, L"niece"}, | |
| 502 {L"nieghbor", false, 0, 0, L"neighbor"}, | |
| 503 {L"nieghbour", false, 0, 0, L"neighbor"}, | |
| 504 {L"niether", false, 0, 0, L"neither"}, | |
| 505 {L"noticable", false, 0, 0, L"noticeable"}, | |
| 506 {L"occassion", false, 0, 0, L"occasion"}, | |
| 507 {L"occasionaly", false, 0, 0, L"occasionally"}, | |
| 508 {L"occurrance", false, 0, 0, L"occurrence"}, | |
| 509 {L"occured", false, 0, 0, L"occurred"}, | |
| 510 {L"oficial", false, 0, 0, L"official"}, | |
| 511 {L"offen", false, 0, 0, L"often"}, | |
| 512 {L"ommision", false, 0, 0, L"omission"}, | |
| 513 {L"oprate", false, 0, 0, L"operate"}, | |
| 514 {L"oppurtunity", false, 0, 0, L"opportunity"}, | |
| 515 {L"orignal", false, 0, 0, L"original"}, | |
| 516 {L"outragous", false, 0, 0, L"outrageous"}, | |
| 517 {L"parrallel", false, 0, 0, L"parallel"}, | |
| 518 {L"parliment", false, 0, 0, L"parliament"}, | |
| 519 {L"particurly", false, 0, 0, L"particularly"}, | |
| 520 {L"passtime", false, 0, 0, L"pastime"}, | |
| 521 {L"peculier", false, 0, 0, L"peculiar"}, | |
| 522 {L"percieve", false, 0, 0, L"perceive"}, | |
| 523 {L"pernament", false, 0, 0, L"permanent"}, | |
| 524 {L"perseverence", false, 0, 0, L"perseverance"}, | |
| 525 {L"personaly", false, 0, 0, L"personally"}, | |
| 526 {L"personell", false, 0, 0, L"personnel"}, | |
| 527 {L"persaude", false, 0, 0, L"persuade"}, | |
| 528 {L"pichure", false, 0, 0, L"picture"}, | |
| 529 {L"peice", false, 0, 0, L"piece"}, | |
| 530 {L"plagerize", false, 0, 0, L"plagiarize"}, | |
| 531 {L"playright", false, 0, 0, L"playwright"}, | |
| 532 {L"plesant", false, 0, 0, L"pleasant"}, | |
| 533 {L"pollitical", false, 0, 0, L"political"}, | |
| 534 {L"posession", false, 0, 0, L"possession"}, | |
| 535 {L"potatos", false, 0, 0, L"potatoes"}, | |
| 536 {L"practicle", false, 0, 0, L"practical"}, | |
| 537 {L"preceed", false, 0, 0, L"precede"}, | |
| 538 {L"predjudice", false, 0, 0, L"prejudice"}, | |
| 539 {L"presance", false, 0, 0, L"presence"}, | |
| 540 {L"privelege", false, 0, 0, L"privilege"}, | |
| 541 // This one should probably work. It does in FF and Hunspell. | |
| 542 // {L"probly", false, 0, 0, L"probably"}, | |
| 543 {L"proffesional", false, 0, 0, L"professional"}, | |
| 544 {L"professer", false, 0, 0, L"professor"}, | |
| 545 {L"promiss", false, 0, 0, L"promise"}, | |
| 546 {L"pronounciation", false, 0, 0, L"pronunciation"}, | |
| 547 {L"prufe", false, 0, 0, L"proof"}, | |
| 548 {L"psycology", false, 0, 0, L"psychology"}, | |
| 549 {L"publically", false, 0, 0, L"publicly"}, | |
| 550 {L"quanity", false, 0, 0, L"quantity"}, | |
| 551 {L"quarentine", false, 0, 0, L"quarantine"}, | |
| 552 {L"questionaire", false, 0, 0, L"questionnaire"}, | |
| 553 {L"readible", false, 0, 0, L"readable"}, | |
| 554 {L"realy", false, 0, 0, L"really"}, | |
| 555 {L"recieve", false, 0, 0, L"receive"}, | |
| 556 {L"reciept", false, 0, 0, L"receipt"}, | |
| 557 {L"reconize", false, 0, 0, L"recognize"}, | |
| 558 {L"recomend", false, 0, 0, L"recommend"}, | |
| 559 {L"refered", false, 0, 0, L"referred"}, | |
| 560 {L"referance", false, 0, 0, L"reference"}, | |
| 561 {L"relevent", false, 0, 0, L"relevant"}, | |
| 562 {L"religous", false, 0, 0, L"religious"}, | |
| 563 {L"repitition", false, 0, 0, L"repetition"}, | |
| 564 {L"restarant", false, 0, 0, L"restaurant"}, | |
| 565 {L"rythm", false, 0, 0, L"rhythm"}, | |
| 566 {L"rediculous", false, 0, 0, L"ridiculous"}, | |
| 567 {L"sacrefice", false, 0, 0, L"sacrifice"}, | |
| 568 {L"saftey", false, 0, 0, L"safety"}, | |
| 569 {L"sissors", false, 0, 0, L"scissors"}, | |
| 570 {L"secratary", false, 0, 0, L"secretary"}, | |
| 571 {L"sieze", false, 0, 0, L"seize"}, | |
| 572 {L"seperate", false, 0, 0, L"separate"}, | |
| 573 {L"sargent", false, 0, 0, L"sergeant"}, | |
| 574 {L"shineing", false, 0, 0, L"shining"}, | |
| 575 {L"similer", false, 0, 0, L"similar"}, | |
| 576 {L"sinceerly", false, 0, 0, L"sincerely"}, | |
| 577 {L"speach", false, 0, 0, L"speech"}, | |
| 578 {L"stoping", false, 0, 0, L"stopping"}, | |
| 579 {L"strenght", false, 0, 0, L"strength"}, | |
| 580 {L"succede", false, 0, 0, L"succeed"}, | |
| 581 {L"succesful", false, 0, 0, L"successful"}, | |
| 582 {L"supercede", false, 0, 0, L"supersede"}, | |
| 583 {L"surelly", false, 0, 0, L"surely"}, | |
| 584 {L"suprise", false, 0, 0, L"surprise"}, | |
| 585 {L"temperture", false, 0, 0, L"temperature"}, | |
| 586 {L"temprary", false, 0, 0, L"temporary"}, | |
| 587 {L"tomatos", false, 0, 0, L"tomatoes"}, | |
| 588 {L"tommorrow", false, 0, 0, L"tomorrow"}, | |
| 589 {L"tounge", false, 0, 0, L"tongue"}, | |
| 590 {L"truely", false, 0, 0, L"truly"}, | |
| 591 {L"twelth", false, 0, 0, L"twelfth"}, | |
| 592 {L"tyrany", false, 0, 0, L"tyranny"}, | |
| 593 {L"underate", false, 0, 0, L"underrate"}, | |
| 594 {L"untill", false, 0, 0, L"until"}, | |
| 595 {L"unuseual", false, 0, 0, L"unusual"}, | |
| 596 {L"upholstry", false, 0, 0, L"upholstery"}, | |
| 597 {L"usible", false, 0, 0, L"usable"}, | |
| 598 {L"useing", false, 0, 0, L"using"}, | |
| 599 {L"usualy", false, 0, 0, L"usually"}, | |
| 600 {L"vaccuum", false, 0, 0, L"vacuum"}, | |
| 601 {L"vegatarian", false, 0, 0, L"vegetarian"}, | |
| 602 {L"vehical", false, 0, 0, L"vehicle"}, | |
| 603 {L"visious", false, 0, 0, L"vicious"}, | |
| 604 {L"villege", false, 0, 0, L"village"}, | |
| 605 {L"wierd", false, 0, 0, L"weird"}, | |
| 606 {L"wellcome", false, 0, 0, L"welcome"}, | |
| 607 {L"wellfare", false, 0, 0, L"welfare"}, | |
| 608 {L"wilfull", false, 0, 0, L"willful"}, | |
| 609 {L"withold", false, 0, 0, L"withhold"}, | |
| 610 {L"writting", false, 0, 0, L"writing"}, | |
| 611 #else | |
| 612 {L"ello", false, 0, 0, L"hello"}, | |
| 613 {L"ello", false, 0, 0, L"cello"}, | |
| 614 {L"wate", false, 0, 0, L"water"}, | |
| 615 {L"wate", false, 0, 0, L"waste"}, | |
| 616 {L"wate", false, 0, 0, L"sate"}, | |
| 617 {L"wate", false, 0, 0, L"ate"}, | |
| 618 {L"jum", false, 0, 0, L"jump"}, | |
| 619 {L"jum", false, 0, 0, L"hum"}, | |
| 620 {L"jum", false, 0, 0, L"sum"}, | |
| 621 {L"jum", false, 0, 0, L"um"}, | |
| 622 #endif // !OS_MACOSX | |
| 623 // TODO (Sidchat): add many more examples. | |
| 624 }; | |
| 625 | |
| 626 FilePath hunspell_directory = GetHunspellDirectory(); | |
| 627 ASSERT_FALSE(hunspell_directory.empty()); | |
| 628 | |
| 629 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
| 630 hunspell_directory, "en-US", NULL, FilePath())); | |
| 631 spell_checker->Initialize(); | |
| 632 message_loop_.RunAllPending(); | |
| 633 | |
| 634 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
| 635 std::vector<string16> suggestions; | |
| 636 size_t input_length = 0; | |
| 637 if (kTestCases[i].input != NULL) { | |
| 638 input_length = wcslen(kTestCases[i].input); | |
| 639 } | |
| 640 int misspelling_start; | |
| 641 int misspelling_length; | |
| 642 bool result = spell_checker->SpellCheckWord( | |
| 643 WideToUTF16(kTestCases[i].input).c_str(), | |
| 644 static_cast<int>(input_length), | |
| 645 0, | |
| 646 &misspelling_start, | |
| 647 &misspelling_length, | |
| 648 &suggestions); | |
| 649 | |
| 650 // Check for spelling. | |
| 651 EXPECT_EQ(kTestCases[i].expected_result, result); | |
| 652 | |
| 653 // Check if the suggested words occur. | |
| 654 bool suggested_word_is_present = false; | |
| 655 for (int j=0; j < static_cast<int>(suggestions.size()); j++) { | |
| 656 if (suggestions.at(j).compare(WideToUTF16(kTestCases[i].suggested_word)) | |
| 657 == 0) { | |
| 658 suggested_word_is_present = true; | |
| 659 break; | |
| 660 } | |
| 661 } | |
| 662 | |
| 663 EXPECT_TRUE(suggested_word_is_present); | |
| 664 } | |
| 665 } | |
| 666 | |
| 667 // This test verifies our spellchecker can split a text into words and check | |
| 668 // the spelling of each word in the text. | |
| 669 TEST_F(SpellCheckTest, SpellCheckText) { | |
| 670 static const struct { | |
| 671 const char* language; | |
| 672 const wchar_t* input; | |
| 673 } kTestCases[] = { | |
| 674 { | |
| 675 // Catalan | |
| 676 "ca-ES", | |
| 677 L"La missi\x00F3 de Google \x00E9s organitzar la informaci\x00F3 " | |
| 678 L"del m\x00F3n i fer que sigui \x00FAtil i accessible universalment." | |
| 679 }, { | |
| 680 // Czech | |
| 681 "cs-CZ", | |
| 682 L"Posl\x00E1n\x00EDm spole\x010Dnosti Google je " | |
| 683 L"uspo\x0159\x00E1\x0064\x0061t informace z cel\x00E9ho sv\x011Bta " | |
| 684 L"tak, aby byly v\x0161\x0065obecn\x011B p\x0159\x00EDstupn\x00E9 " | |
| 685 L"a u\x017Eite\x010Dn\x00E9." | |
| 686 }, { | |
| 687 // Danish | |
| 688 "da-DK", | |
| 689 L"Googles " | |
| 690 L"mission er at organisere verdens information og g\x00F8re den " | |
| 691 L"almindeligt tilg\x00E6ngelig og nyttig." | |
| 692 }, { | |
| 693 // German | |
| 694 "de-DE", | |
| 695 L"Das Ziel von Google besteht darin, die auf der Welt vorhandenen " | |
| 696 L"Informationen zu organisieren und allgemein zug\x00E4nglich und " | |
| 697 L"nutzbar zu machen." | |
| 698 }, { | |
| 699 // Greek | |
| 700 "el-GR", | |
| 701 L"\x0391\x03C0\x03BF\x03C3\x03C4\x03BF\x03BB\x03AE " | |
| 702 L"\x03C4\x03B7\x03C2 Google \x03B5\x03AF\x03BD\x03B1\x03B9 " | |
| 703 L"\x03BD\x03B1 \x03BF\x03C1\x03B3\x03B1\x03BD\x03CE\x03BD\x03B5\x03B9 " | |
| 704 L"\x03C4\x03B9\x03C2 " | |
| 705 L"\x03C0\x03BB\x03B7\x03C1\x03BF\x03C6\x03BF\x03C1\x03AF\x03B5\x03C2 " | |
| 706 L"\x03C4\x03BF\x03C5 \x03BA\x03CC\x03C3\x03BC\x03BF\x03C5 " | |
| 707 L"\x03BA\x03B1\x03B9 \x03BD\x03B1 \x03C4\x03B9\x03C2 " | |
| 708 L"\x03BA\x03B1\x03B8\x03B9\x03C3\x03C4\x03AC " | |
| 709 L"\x03C0\x03C1\x03BF\x03C3\x03B2\x03AC\x03C3\x03B9\x03BC\x03B5\x03C2 " | |
| 710 L"\x03BA\x03B1\x03B9 \x03C7\x03C1\x03AE\x03C3\x03B9\x03BC\x03B5\x03C2." | |
| 711 }, { | |
| 712 // English (Australia) | |
| 713 "en-AU", | |
| 714 // L"Google's " - to be added. | |
| 715 L"mission is to organise the world's information and make it " | |
| 716 L"universally accessible and useful." | |
| 717 }, { | |
| 718 // English (United Kingdom) | |
| 719 "en-GB", | |
| 720 // L"Google's " - to be added. | |
| 721 L"mission is to organise the world's information and make it " | |
| 722 L"universally accessible and useful." | |
| 723 }, { | |
| 724 // English (United States) | |
| 725 "en-US", | |
| 726 L"Google's mission is to organize the world's information and make it " | |
| 727 L"universally accessible and useful." | |
| 728 }, { | |
| 729 // Spanish | |
| 730 "es-ES", | |
| 731 L"La misi\x00F3n de " | |
| 732 // L"Google" - to be added. | |
| 733 L" es organizar la informaci\x00F3n mundial " | |
| 734 L"para que resulte universalmente accesible y \x00FAtil." | |
| 735 }, { | |
| 736 // Estonian | |
| 737 "et-EE", | |
| 738 // L"Google'ile " - to be added. | |
| 739 L"\x00FClesanne on korraldada maailma teavet ja teeb selle " | |
| 740 L"k\x00F5igile k\x00E4ttesaadavaks ja kasulikuks.", | |
| 741 }, { | |
| 742 // French | |
| 743 "fr-FR", | |
| 744 L"Google a pour mission d'organiser les informations \x00E0 " | |
| 745 L"l'\x00E9\x0063helle mondiale dans le but de les rendre accessibles " | |
| 746 L"et utiles \x00E0 tous." | |
| 747 }, { | |
| 748 // Hebrew | |
| 749 "he-IL", | |
| 750 L"\x05D4\x05DE\x05E9\x05D9\x05DE\x05D4 \x05E9\x05DC Google " | |
| 751 L"\x05D4\x05D9\x05D0 \x05DC\x05D0\x05E8\x05D2\x05DF " | |
| 752 L"\x05D0\x05EA \x05D4\x05DE\x05D9\x05D3\x05E2 " | |
| 753 L"\x05D4\x05E2\x05D5\x05DC\x05DE\x05D9 " | |
| 754 L"\x05D5\x05DC\x05D4\x05E4\x05D5\x05DA \x05D0\x05D5\x05EA\x05D5 " | |
| 755 L"\x05DC\x05D6\x05DE\x05D9\x05DF " | |
| 756 L"\x05D5\x05E9\x05D9\x05DE\x05D5\x05E9\x05D9 \x05D1\x05DB\x05DC " | |
| 757 L"\x05D4\x05E2\x05D5\x05DC\x05DD." | |
| 758 }, { | |
| 759 // Hindi | |
| 760 "hi-IN", | |
| 761 L"Google \x0915\x093E \x092E\x093F\x0936\x0928 " | |
| 762 L"\x0926\x0941\x0928\x093F\x092F\x093E \x0915\x0940 " | |
| 763 L"\x091C\x093E\x0928\x0915\x093E\x0930\x0940 \x0915\x094B " | |
| 764 L"\x0935\x094D\x092F\x0935\x0938\x094D\x0925\x093F\x0924 " | |
| 765 L"\x0915\x0930\x0928\x093E \x0914\x0930 \x0909\x0938\x0947 " | |
| 766 L"\x0938\x093E\x0930\x094D\x0935\x092D\x094C\x092E\x093F\x0915 " | |
| 767 L"\x0930\x0942\x092A \x0938\x0947 \x092A\x0939\x0941\x0901\x091A " | |
| 768 L"\x092E\x0947\x0902 \x0914\x0930 \x0909\x092A\x092F\x094B\x0917\x0940 " | |
| 769 L"\x092C\x0928\x093E\x0928\x093E \x0939\x0948." | |
| 770 }, { | |
| 771 // Croatian | |
| 772 "hr-HR", | |
| 773 // L"Googleova " - to be added. | |
| 774 L"je misija organizirati svjetske informacije i u\x010Diniti ih " | |
| 775 // L"univerzalno " - to be added. | |
| 776 L"pristupa\x010Dnima i korisnima." | |
| 777 }, { | |
| 778 // Indonesian | |
| 779 "id-ID", | |
| 780 L"Misi Google adalah untuk mengelola informasi dunia dan membuatnya " | |
| 781 L"dapat diakses dan bermanfaat secara universal." | |
| 782 }, { | |
| 783 // Italian | |
| 784 "it-IT", | |
| 785 L"La missione di Google \x00E8 organizzare le informazioni a livello " | |
| 786 L"mondiale e renderle universalmente accessibili e fruibili." | |
| 787 }, { | |
| 788 // Lithuanian | |
| 789 "lt-LT", | |
| 790 L"\x201EGoogle\x201C tikslas \x2013 rinkti ir sisteminti pasaulio " | |
| 791 L"informacij\x0105 bei padaryti j\x0105 prieinam\x0105 ir " | |
| 792 L"nauding\x0105 visiems." | |
| 793 }, { | |
| 794 // Latvian | |
| 795 "lv-LV", | |
| 796 L"Google uzdevums ir k\x0101rtot pasaules inform\x0101" | |
| 797 L"ciju un padar\x012Bt to univers\x0101li pieejamu un noder\x012Bgu." | |
| 798 }, { | |
| 799 // Norwegian | |
| 800 "nb-NO", | |
| 801 // L"Googles " - to be added. | |
| 802 L"m\x00E5l er \x00E5 organisere informasjonen i verden og " | |
| 803 L"gj\x00F8re den tilgjengelig og nyttig for alle." | |
| 804 }, { | |
| 805 // Dutch | |
| 806 "nl-NL", | |
| 807 L"Het doel van Google is om alle informatie wereldwijd toegankelijk " | |
| 808 L"en bruikbaar te maken." | |
| 809 }, { | |
| 810 // Polish | |
| 811 "pl-PL", | |
| 812 L"Misj\x0105 Google jest uporz\x0105" L"dkowanie \x015Bwiatowych " | |
| 813 L"zasob\x00F3w informacji, aby sta\x0142y si\x0119 one powszechnie " | |
| 814 L"dost\x0119pne i u\x017Cyteczne." | |
| 815 }, { | |
| 816 // Portuguese (Brazil) | |
| 817 "pt-BR", | |
| 818 L"A miss\x00E3o do " | |
| 819 #if !defined(OS_MACOSX) | |
| 820 L"Google " | |
| 821 #endif | |
| 822 L"\x00E9 organizar as informa\x00E7\x00F5" | |
| 823 L"es do mundo todo e " | |
| 824 #if !defined(OS_MACOSX) | |
| 825 L"torn\x00E1-las " | |
| 826 #endif | |
| 827 L"acess\x00EDveis e " | |
| 828 // L"\x00FAteis " - to be added. | |
| 829 L"em car\x00E1ter universal." | |
| 830 }, { | |
| 831 // Portuguese (Portugal) | |
| 832 "pt-PT", | |
| 833 L"O " | |
| 834 #if !defined(OS_MACOSX) | |
| 835 L"Google " | |
| 836 #endif | |
| 837 L"tem por miss\x00E3o organizar a informa\x00E7\x00E3o do " | |
| 838 L"mundo e " | |
| 839 #if !defined(OS_MACOSX) | |
| 840 L"torn\x00E1-la " | |
| 841 #endif | |
| 842 L"universalmente acess\x00EDvel e \x00FAtil" | |
| 843 }, { | |
| 844 // Romanian | |
| 845 "ro-RO", | |
| 846 L"Misiunea Google este de " | |
| 847 // L"a " - to be added. | |
| 848 L"organiza informa\x0163iile lumii \x015Fi de " | |
| 849 // L"a " - to be added. | |
| 850 L"le face accesibile \x015Fi utile la nivel universal." | |
| 851 }, { | |
| 852 // Russian | |
| 853 "ru-RU", | |
| 854 L"\x041C\x0438\x0441\x0441\x0438\x044F Google " | |
| 855 L"\x0441\x043E\x0441\x0442\x043E\x0438\x0442 \x0432 " | |
| 856 L"\x043E\x0440\x0433\x0430\x043D\x0438\x0437\x0430\x0446\x0438\x0438 " | |
| 857 L"\x043C\x0438\x0440\x043E\x0432\x043E\x0439 " | |
| 858 L"\x0438\x043D\x0444\x043E\x0440\x043C\x0430\x0446\x0438\x0438, " | |
| 859 L"\x043E\x0431\x0435\x0441\x043F\x0435\x0447\x0435\x043D\x0438\x0438 " | |
| 860 L"\x0435\x0435 " | |
| 861 L"\x0434\x043E\x0441\x0442\x0443\x043F\x043D\x043E\x0441\x0442\x0438 " | |
| 862 L"\x0438 \x043F\x043E\x043B\x044C\x0437\x044B \x0434\x043B\x044F " | |
| 863 L"\x0432\x0441\x0435\x0445." | |
| 864 }, { | |
| 865 // Slovak | |
| 866 "sk-SK", | |
| 867 L"Spolo\x010Dnos\x0165 Google si dala za \x00FAlohu usporiada\x0165 " | |
| 868 L"inform\x00E1\x0063ie " | |
| 869 L"z cel\x00E9ho sveta a zabezpe\x010Di\x0165, " | |
| 870 L"aby boli v\x0161eobecne dostupn\x00E9 a u\x017Eito\x010Dn\x00E9." | |
| 871 }, { | |
| 872 // Slovenian | |
| 873 "sl-SI", | |
| 874 // L"Googlovo " - to be added. | |
| 875 L"poslanstvo je organizirati svetovne informacije in " | |
| 876 L"omogo\x010Diti njihovo dostopnost in s tem uporabnost za vse." | |
| 877 }, { | |
| 878 // Swedish | |
| 879 "sv-SE", | |
| 880 L"Googles m\x00E5ls\x00E4ttning \x00E4r att ordna v\x00E4rldens " | |
| 881 L"samlade information och g\x00F6ra den tillg\x00E4nglig f\x00F6r alla." | |
| 882 }, { | |
| 883 // Turkish | |
| 884 "tr-TR", | |
| 885 // L"Google\x2019\x0131n " - to be added. | |
| 886 L"misyonu, d\x00FCnyadaki t\x00FCm bilgileri " | |
| 887 L"organize etmek ve evrensel olarak eri\x015Filebilir ve " | |
| 888 L"kullan\x0131\x015Fl\x0131 k\x0131lmakt\x0131r." | |
| 889 }, { | |
| 890 // Vietnamese | |
| 891 "vi-VN", | |
| 892 L"Nhi\x1EC7m v\x1EE5 c\x1EE7\x0061 " | |
| 893 L"Google la \x0111\x1EC3 t\x1ED5 ch\x1EE9\x0063 " | |
| 894 L"c\x00E1\x0063 th\x00F4ng tin c\x1EE7\x0061 " | |
| 895 L"th\x1EBF gi\x1EDBi va l\x00E0m cho n\x00F3 universal c\x00F3 " | |
| 896 L"th\x1EC3 truy c\x1EADp va h\x1EEFu d\x1EE5ng h\x01A1n." | |
| 897 }, | |
| 898 }; | |
| 899 | |
| 900 FilePath hunspell_directory = GetHunspellDirectory(); | |
| 901 ASSERT_FALSE(hunspell_directory.empty()); | |
| 902 | |
| 903 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
| 904 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
| 905 hunspell_directory, kTestCases[i].language, NULL, FilePath())); | |
| 906 spell_checker->Initialize(); | |
| 907 message_loop_.RunAllPending(); | |
| 908 | |
| 909 size_t input_length = 0; | |
| 910 if (kTestCases[i].input != NULL) | |
| 911 input_length = wcslen(kTestCases[i].input); | |
| 912 | |
| 913 int misspelling_start = 0; | |
| 914 int misspelling_length = 0; | |
| 915 bool result = spell_checker->SpellCheckWord( | |
| 916 WideToUTF16(kTestCases[i].input).c_str(), | |
| 917 static_cast<int>(input_length), | |
| 918 0, | |
| 919 &misspelling_start, | |
| 920 &misspelling_length, NULL); | |
| 921 | |
| 922 EXPECT_EQ(true, result) << kTestCases[i].language; | |
| 923 EXPECT_EQ(0, misspelling_start); | |
| 924 EXPECT_EQ(0, misspelling_length); | |
| 925 } | |
| 926 } | |
| 927 | |
| 928 // This test Adds words to the SpellChecker and veifies that it remembers them. | |
| 929 TEST_F(SpellCheckTest, DISABLED_SpellCheckAddToDictionary_EN_US) { | |
| 930 static const struct { | |
| 931 // A string to be added to SpellChecker. | |
| 932 const wchar_t* word_to_add; | |
| 933 } kTestCases[] = { // Words to be added to the SpellChecker. | |
| 934 {L"Googley"}, | |
| 935 {L"Googleplex"}, | |
| 936 {L"Googler"}, | |
| 937 }; | |
| 938 | |
| 939 FilePath custom_dictionary_file(kTempCustomDictionaryFile); | |
| 940 FilePath hunspell_directory = GetHunspellDirectory(); | |
| 941 ASSERT_FALSE(hunspell_directory.empty()); | |
| 942 | |
| 943 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
| 944 hunspell_directory, "en-US", NULL, custom_dictionary_file)); | |
| 945 spell_checker->Initialize(); | |
| 946 message_loop_.RunAllPending(); | |
| 947 | |
| 948 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
| 949 // Add the word to spellchecker. | |
| 950 spell_checker->AddWord(WideToUTF16(kTestCases[i].word_to_add)); | |
| 951 | |
| 952 // Now check whether it is added to Spellchecker. | |
| 953 std::vector<string16> suggestions; | |
| 954 size_t input_length = 0; | |
| 955 if (kTestCases[i].word_to_add != NULL) { | |
| 956 input_length = wcslen(kTestCases[i].word_to_add); | |
| 957 } | |
| 958 int misspelling_start; | |
| 959 int misspelling_length; | |
| 960 bool result = spell_checker->SpellCheckWord( | |
| 961 WideToUTF16(kTestCases[i].word_to_add).c_str(), | |
| 962 static_cast<int>(input_length), | |
| 963 0, | |
| 964 &misspelling_start, | |
| 965 &misspelling_length, | |
| 966 &suggestions); | |
| 967 | |
| 968 // Check for spelling. | |
| 969 EXPECT_TRUE(result); | |
| 970 } | |
| 971 | |
| 972 // Now initialize another spellchecker to see that AddToWord is permanent. | |
| 973 scoped_refptr<SpellChecker> spell_checker_new(new SpellChecker( | |
| 974 hunspell_directory, "en-US", NULL, custom_dictionary_file)); | |
| 975 spell_checker->Initialize(); | |
| 976 message_loop_.RunAllPending(); | |
| 977 | |
| 978 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
| 979 // Now check whether it is added to Spellchecker. | |
| 980 std::vector<string16> suggestions; | |
| 981 size_t input_length = 0; | |
| 982 if (kTestCases[i].word_to_add != NULL) { | |
| 983 input_length = wcslen(kTestCases[i].word_to_add); | |
| 984 } | |
| 985 int misspelling_start; | |
| 986 int misspelling_length; | |
| 987 bool result = spell_checker_new->SpellCheckWord( | |
| 988 WideToUTF16(kTestCases[i].word_to_add).c_str(), | |
| 989 static_cast<int>(input_length), | |
| 990 0, | |
| 991 &misspelling_start, | |
| 992 &misspelling_length, | |
| 993 &suggestions); | |
| 994 | |
| 995 // Check for spelling. | |
| 996 EXPECT_TRUE(result); | |
| 997 } | |
| 998 | |
| 999 // Remove the temp custom dictionary file. | |
| 1000 file_util::Delete(custom_dictionary_file, false); | |
| 1001 } | |
| 1002 | |
| 1003 // SpellChecker should suggest custome words for misspelled words. | |
| 1004 TEST_F(SpellCheckTest, DISABLED_SpellCheckSuggestionsAddToDictionary_EN_US) { | |
| 1005 static const struct { | |
| 1006 // A string to be added to SpellChecker. | |
| 1007 const wchar_t* word_to_add; | |
| 1008 } kTestCases[] = { // word to be added to SpellChecker | |
| 1009 {L"Googley"}, | |
| 1010 {L"Googleplex"}, | |
| 1011 {L"Googler"}, | |
| 1012 }; | |
| 1013 | |
| 1014 FilePath custom_dictionary_file(kTempCustomDictionaryFile); | |
| 1015 FilePath hunspell_directory = GetHunspellDirectory(); | |
| 1016 ASSERT_FALSE(hunspell_directory.empty()); | |
| 1017 | |
| 1018 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
| 1019 hunspell_directory, "en-US", NULL, custom_dictionary_file)); | |
| 1020 spell_checker->Initialize(); | |
| 1021 message_loop_.RunAllPending(); | |
| 1022 | |
| 1023 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
| 1024 // Add the word to spellchecker. | |
| 1025 spell_checker->AddWord(WideToUTF16(kTestCases[i].word_to_add)); | |
| 1026 } | |
| 1027 | |
| 1028 // Now check to see whether the custom words are suggested for | |
| 1029 // misspelled but similar words. | |
| 1030 static const struct { | |
| 1031 // A string to be tested. | |
| 1032 const wchar_t* input; | |
| 1033 // An expected result for this test case. | |
| 1034 // * true: the input string does not have any invalid words. | |
| 1035 // * false: the input string has one or more invalid words. | |
| 1036 bool expected_result; | |
| 1037 // The position and the length of the first invalid word. | |
| 1038 int misspelling_start; | |
| 1039 int misspelling_length; | |
| 1040 | |
| 1041 // A suggested word that should occur. | |
| 1042 const wchar_t* suggested_word; | |
| 1043 } kTestCasesToBeTested[] = { | |
| 1044 {L"oogley", false, 0, 0, L"Googley"}, | |
| 1045 {L"oogler", false, 0, 0, L"Googler"}, | |
| 1046 {L"oogleplex", false, 0, 0, L"Googleplex"}, | |
| 1047 }; | |
| 1048 | |
| 1049 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCasesToBeTested); ++i) { | |
| 1050 std::vector<string16> suggestions; | |
| 1051 size_t input_length = 0; | |
| 1052 if (kTestCasesToBeTested[i].input != NULL) { | |
| 1053 input_length = wcslen(kTestCasesToBeTested[i].input); | |
| 1054 } | |
| 1055 int misspelling_start; | |
| 1056 int misspelling_length; | |
| 1057 bool result = spell_checker->SpellCheckWord( | |
| 1058 WideToUTF16(kTestCasesToBeTested[i].input).c_str(), | |
| 1059 static_cast<int>(input_length), | |
| 1060 0, | |
| 1061 &misspelling_start, | |
| 1062 &misspelling_length, | |
| 1063 &suggestions); | |
| 1064 | |
| 1065 // Check for spelling. | |
| 1066 EXPECT_EQ(result, kTestCasesToBeTested[i].expected_result); | |
| 1067 | |
| 1068 // Check if the suggested words occur. | |
| 1069 bool suggested_word_is_present = false; | |
| 1070 for (int j=0; j < static_cast<int>(suggestions.size()); j++) { | |
| 1071 if (suggestions.at(j).compare( | |
| 1072 WideToUTF16(kTestCasesToBeTested[i].suggested_word)) == | |
| 1073 0) { | |
| 1074 suggested_word_is_present = true; | |
| 1075 break; | |
| 1076 } | |
| 1077 } | |
| 1078 | |
| 1079 EXPECT_TRUE(suggested_word_is_present); | |
| 1080 } | |
| 1081 | |
| 1082 // Remove the temp custom dictionary file. | |
| 1083 file_util::Delete(custom_dictionary_file, false); | |
| 1084 } | |
| 1085 | |
| 1086 TEST_F(SpellCheckTest, GetAutoCorrectionWord_EN_US) { | |
| 1087 static const struct { | |
| 1088 // A misspelled word. | |
| 1089 const char* input; | |
| 1090 | |
| 1091 // An expected result for this test case. | |
| 1092 // Should be an empty string if there are no suggestions for auto correct. | |
| 1093 const char* expected_result; | |
| 1094 } kTestCases[] = { | |
| 1095 {"teh", "the"}, | |
| 1096 {"moer", "more"}, | |
| 1097 {"watre", "water"}, | |
| 1098 {"noen", ""}, | |
| 1099 {"what", ""}, | |
| 1100 }; | |
| 1101 | |
| 1102 FilePath hunspell_directory = GetHunspellDirectory(); | |
| 1103 ASSERT_FALSE(hunspell_directory.empty()); | |
| 1104 | |
| 1105 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
| 1106 hunspell_directory, "en-US", NULL, FilePath())); | |
| 1107 spell_checker->EnableAutoSpellCorrect(true); | |
| 1108 spell_checker->Initialize(); | |
| 1109 message_loop_.RunAllPending(); | |
| 1110 | |
| 1111 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
| 1112 string16 misspelled_word(UTF8ToUTF16(kTestCases[i].input)); | |
| 1113 string16 expected_autocorrect_word( | |
| 1114 UTF8ToUTF16(kTestCases[i].expected_result)); | |
| 1115 string16 autocorrect_word = spell_checker->GetAutoCorrectionWord( | |
| 1116 misspelled_word, 0); | |
| 1117 | |
| 1118 // Check for spelling. | |
| 1119 EXPECT_EQ(expected_autocorrect_word, autocorrect_word); | |
| 1120 } | |
| 1121 } | |
| 1122 | |
| 1123 #if defined(OS_MACOSX) | |
| 1124 // Tests that words are properly ignored. Currently only enabled on OS X as it | |
| 1125 // is the only platform to support ignoring words. Note that in this test, we | |
| 1126 // supply a non-zero doc_tag, in order to test that ignored words are matched to | |
| 1127 // the correct document. | |
| 1128 TEST_F(SpellCheckTest, IgnoreWords_EN_US) { | |
| 1129 static const struct { | |
| 1130 // A misspelled word. | |
| 1131 const char* input; | |
| 1132 bool input_result; | |
| 1133 } kTestCases[] = { | |
| 1134 {"teh", false}, | |
| 1135 {"moer", false}, | |
| 1136 {"watre", false}, | |
| 1137 {"noen", false}, | |
| 1138 }; | |
| 1139 | |
| 1140 FilePath hunspell_directory = GetHunspellDirectory(); | |
| 1141 ASSERT_FALSE(hunspell_directory.empty()); | |
| 1142 | |
| 1143 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
| 1144 hunspell_directory, "en-US", NULL, FilePath())); | |
| 1145 spell_checker->Initialize(); | |
| 1146 message_loop_.RunAllPending(); | |
| 1147 | |
| 1148 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
| 1149 string16 word(UTF8ToUTF16(kTestCases[i].input)); | |
| 1150 std::vector<string16> suggestions; | |
| 1151 size_t input_length = 0; | |
| 1152 if (kTestCases[i].input != NULL) { | |
| 1153 input_length = word.length(); | |
| 1154 } | |
| 1155 int misspelling_start; | |
| 1156 int misspelling_length; | |
| 1157 | |
| 1158 int doc_tag = SpellCheckerPlatform::GetDocumentTag(); | |
| 1159 bool result = spell_checker->SpellCheckWord(word.c_str(), | |
| 1160 static_cast<int>(input_length), | |
| 1161 doc_tag, | |
| 1162 &misspelling_start, | |
| 1163 &misspelling_length, | |
| 1164 &suggestions); | |
| 1165 | |
| 1166 // The word should show up as misspelled. | |
| 1167 EXPECT_EQ(kTestCases[i].input_result, result); | |
| 1168 | |
| 1169 // Ignore the word. | |
| 1170 SpellCheckerPlatform::IgnoreWord(word); | |
| 1171 | |
| 1172 // Spellcheck again. | |
| 1173 result = spell_checker->SpellCheckWord(word.c_str(), | |
| 1174 static_cast<int>(input_length), | |
| 1175 doc_tag, | |
| 1176 &misspelling_start, | |
| 1177 &misspelling_length, | |
| 1178 &suggestions); | |
| 1179 | |
| 1180 // The word should now show up as correctly spelled. | |
| 1181 EXPECT_EQ(!(kTestCases[i].input_result), result); | |
| 1182 | |
| 1183 // Close the docuemnt. Any words that we had previously ignored should no | |
| 1184 // longer be ignored and thus should show up as misspelled. | |
| 1185 SpellCheckerPlatform::CloseDocumentWithTag(doc_tag); | |
| 1186 | |
| 1187 // Spellcheck one more time. | |
| 1188 result = spell_checker->SpellCheckWord(word.c_str(), | |
| 1189 static_cast<int>(input_length), | |
| 1190 doc_tag, | |
| 1191 &misspelling_start, | |
| 1192 &misspelling_length, | |
| 1193 &suggestions); | |
| 1194 | |
| 1195 // The word should now show be spelled wrong again | |
| 1196 EXPECT_EQ(kTestCases[i].input_result, result); | |
| 1197 } | |
| 1198 } // Test IgnoreWords_EN_US | |
| 1199 #endif // OS_MACOSX | |
| OLD | NEW |