OLD | NEW |
| (Empty) |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "webkit/glue/webkit_glue.h" | |
6 | |
7 #include "base/file_util.h" | |
8 #include "base/message_loop.h" | |
9 #include "base/path_service.h" | |
10 #include "base/sys_string_conversions.h" | |
11 #include "chrome/browser/chrome_thread.h" | |
12 #include "chrome/browser/spellchecker.h" | |
13 #include "chrome/browser/spellchecker_platform_engine.h" | |
14 #include "chrome/common/chrome_paths.h" | |
15 #include "testing/gtest/include/gtest/gtest.h" | |
16 | |
17 namespace { | |
18 const FilePath::CharType kTempCustomDictionaryFile[] = | |
19 FILE_PATH_LITERAL("temp_custom_dictionary.txt"); | |
20 } // namespace | |
21 | |
22 class SpellCheckTest : public testing::Test { | |
23 public: | |
24 SpellCheckTest() | |
25 : file_thread_(ChromeThread::FILE, &message_loop_), | |
26 io_thread_(ChromeThread::IO, &message_loop_) {} | |
27 | |
28 protected: | |
29 MessageLoop message_loop_; | |
30 | |
31 private: | |
32 ChromeThread file_thread_; | |
33 ChromeThread io_thread_; // To keep DCHECKs inside spell checker happy. | |
34 }; | |
35 | |
36 // Represents a special initialization function used only for the unit tests | |
37 // in this file. | |
38 extern void InitHunspellWithFiles(FILE* file_aff_hunspell, | |
39 FILE* file_dic_hunspell); | |
40 | |
41 FilePath GetHunspellDirectory() { | |
42 FilePath hunspell_directory; | |
43 if (!PathService::Get(base::DIR_SOURCE_ROOT, &hunspell_directory)) | |
44 return FilePath(); | |
45 | |
46 hunspell_directory = hunspell_directory.AppendASCII("third_party"); | |
47 hunspell_directory = hunspell_directory.AppendASCII("hunspell"); | |
48 hunspell_directory = hunspell_directory.AppendASCII("dictionaries"); | |
49 return hunspell_directory; | |
50 } | |
51 | |
52 // Operates unit tests for the webkit_glue::SpellCheckWord() function | |
53 // with the US English dictionary. | |
54 // The unit tests in this function consist of: | |
55 // * Tests for the function with empty strings; | |
56 // * Tests for the function with a valid English word; | |
57 // * Tests for the function with a valid non-English word; | |
58 // * Tests for the function with a valid English word with a preceding | |
59 // space character; | |
60 // * Tests for the function with a valid English word with a preceding | |
61 // non-English word; | |
62 // * Tests for the function with a valid English word with a following | |
63 // space character; | |
64 // * Tests for the function with a valid English word with a following | |
65 // non-English word; | |
66 // * Tests for the function with two valid English words concatenated | |
67 // with space characters or non-English words; | |
68 // * Tests for the function with an invalid English word; | |
69 // * Tests for the function with an invalid English word with a preceding | |
70 // space character; | |
71 // * Tests for the function with an invalid English word with a preceding | |
72 // non-English word; | |
73 // * Tests for the function with2 an invalid English word with a following | |
74 // space character; | |
75 // * Tests for the function with an invalid English word with a following | |
76 // non-English word, and; | |
77 // * Tests for the function with two invalid English words concatenated | |
78 // with space characters or non-English words. | |
79 // A test with a "[ROBUSTNESS]" mark shows it is a robustness test and it uses | |
80 // grammartically incorrect string. | |
81 // TODO(hbono): Please feel free to add more tests. | |
82 TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) { | |
83 static const struct { | |
84 // A string to be tested. | |
85 const wchar_t* input; | |
86 // An expected result for this test case. | |
87 // * true: the input string does not have any invalid words. | |
88 // * false: the input string has one or more invalid words. | |
89 bool expected_result; | |
90 // The position and the length of the first invalid word. | |
91 int misspelling_start; | |
92 int misspelling_length; | |
93 } kTestCases[] = { | |
94 // Empty strings. | |
95 {L"", true, 0, 0}, | |
96 {L" ", true, 0, 0}, | |
97 {L"\xA0", true, 0, 0}, | |
98 {L"\x3000", true, 0, 0}, | |
99 | |
100 // A valid English word "hello". | |
101 {L"hello", true, 0, 0}, | |
102 // A valid Chinese word (meaning "hello") consisiting of two CJKV | |
103 // ideographs | |
104 {L"\x4F60\x597D", true, 0, 0}, | |
105 // A valid Korean word (meaning "hello") consisting of five hangul | |
106 // syllables | |
107 {L"\xC548\xB155\xD558\xC138\xC694", true, 0, 0}, | |
108 // A valid Japanese word (meaning "hello") consisting of five Hiragana | |
109 // letters | |
110 {L"\x3053\x3093\x306B\x3061\x306F", true, 0, 0}, | |
111 // A valid Hindi word (meaning ?) consisting of six Devanagari letters | |
112 // (This word is copied from "http://b/issue?id=857583".) | |
113 {L"\x0930\x093E\x091C\x0927\x093E\x0928", true, 0, 0}, | |
114 // A valid English word "affix" using a Latin ligature 'ffi' | |
115 {L"a\xFB03x", true, 0, 0}, | |
116 // A valid English word "hello" (fullwidth version) | |
117 {L"\xFF28\xFF45\xFF4C\xFF4C\xFF4F", true, 0, 0}, | |
118 // Two valid Greek words (meaning "hello") consisting of seven Greek | |
119 // letters | |
120 {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true, 0, 0}, | |
121 // A valid Russian word (meainng "hello") consisting of twelve Cyrillic | |
122 // letters | |
123 {L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
124 L"\x0442\x0432\x0443\x0439\x0442\x0435", true, 0, 0}, | |
125 // A valid English contraction | |
126 {L"isn't", true, 0, 0}, | |
127 // A valid English word enclosed with underscores. | |
128 {L"_hello_", true, 0, 0}, | |
129 | |
130 // A valid English word with a preceding whitespace | |
131 {L" " L"hello", true, 0, 0}, | |
132 // A valid English word with a preceding no-break space | |
133 {L"\xA0" L"hello", true, 0, 0}, | |
134 // A valid English word with a preceding ideographic space | |
135 {L"\x3000" L"hello", true, 0, 0}, | |
136 // A valid English word with a preceding Chinese word | |
137 {L"\x4F60\x597D" L"hello", true, 0, 0}, | |
138 // [ROBUSTNESS] A valid English word with a preceding Korean word | |
139 {L"\xC548\xB155\xD558\xC138\xC694" L"hello", true, 0, 0}, | |
140 // A valid English word with a preceding Japanese word | |
141 {L"\x3053\x3093\x306B\x3061\x306F" L"hello", true, 0, 0}, | |
142 // [ROBUSTNESS] A valid English word with a preceding Hindi word | |
143 {L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello", true, 0, 0}, | |
144 // [ROBUSTNESS] A valid English word with two preceding Greek words | |
145 {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5" | |
146 L"hello", true, 0, 0}, | |
147 // [ROBUSTNESS] A valid English word with a preceding Russian word | |
148 {L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
149 L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true, 0, 0}, | |
150 | |
151 // A valid English word with a following whitespace | |
152 {L"hello" L" ", true, 0, 0}, | |
153 // A valid English word with a following no-break space | |
154 {L"hello" L"\xA0", true, 0, 0}, | |
155 // A valid English word with a following ideographic space | |
156 {L"hello" L"\x3000", true, 0, 0}, | |
157 // A valid English word with a following Chinese word | |
158 {L"hello" L"\x4F60\x597D", true, 0, 0}, | |
159 // [ROBUSTNESS] A valid English word with a following Korean word | |
160 {L"hello" L"\xC548\xB155\xD558\xC138\xC694", true, 0, 0}, | |
161 // A valid English word with a following Japanese word | |
162 {L"hello" L"\x3053\x3093\x306B\x3061\x306F", true, 0, 0}, | |
163 // [ROBUSTNESS] A valid English word with a following Hindi word | |
164 {L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928", true, 0, 0}, | |
165 // [ROBUSTNESS] A valid English word with two following Greek words | |
166 {L"hello" | |
167 L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true, 0, 0}, | |
168 // [ROBUSTNESS] A valid English word with a following Russian word | |
169 {L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
170 L"\x0442\x0432\x0443\x0439\x0442\x0435", true, 0, 0}, | |
171 | |
172 // Two valid English words concatenated with a whitespace | |
173 {L"hello" L" " L"hello", true, 0, 0}, | |
174 // Two valid English words concatenated with a no-break space | |
175 {L"hello" L"\xA0" L"hello", true, 0, 0}, | |
176 // Two valid English words concatenated with an ideographic space | |
177 {L"hello" L"\x3000" L"hello", true, 0, 0}, | |
178 // Two valid English words concatenated with a Chinese word | |
179 {L"hello" L"\x4F60\x597D" L"hello", true, 0, 0}, | |
180 // [ROBUSTNESS] Two valid English words concatenated with a Korean word | |
181 {L"hello" L"\xC548\xB155\xD558\xC138\xC694" L"hello", true, 0, 0}, | |
182 // Two valid English words concatenated with a Japanese word | |
183 {L"hello" L"\x3053\x3093\x306B\x3061\x306F" L"hello", true, 0, 0}, | |
184 // [ROBUSTNESS] Two valid English words concatenated with a Hindi word | |
185 {L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello" , true, 0, 0}, | |
186 // [ROBUSTNESS] Two valid English words concatenated with two Greek words | |
187 {L"hello" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5" | |
188 L"hello", true, 0, 0}, | |
189 // [ROBUSTNESS] Two valid English words concatenated with a Russian word | |
190 {L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
191 L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true, 0, 0}, | |
192 // [ROBUSTNESS] Two valid English words concatenated with a contraction | |
193 // character. | |
194 {L"hello:hello", true, 0, 0}, | |
195 | |
196 // An invalid English word | |
197 {L"ifmmp", false, 0, 5}, | |
198 // An invalid English word "bffly" containing a Latin ligature 'ffl' | |
199 {L"b\xFB04y", false, 0, 3}, | |
200 // An invalid English word "ifmmp" (fullwidth version) | |
201 {L"\xFF29\xFF46\xFF4D\xFF4D\xFF50", false, 0, 5}, | |
202 // An invalid English contraction | |
203 {L"jtm'u", false, 0, 5}, | |
204 // An invalid English word enclosed with underscores. | |
205 {L"_ifmmp_", false, 1, 5}, | |
206 | |
207 // An invalid English word with a preceding whitespace | |
208 {L" " L"ifmmp", false, 1, 5}, | |
209 // An invalid English word with a preceding no-break space | |
210 {L"\xA0" L"ifmmp", false, 1, 5}, | |
211 // An invalid English word with a preceding ideographic space | |
212 {L"\x3000" L"ifmmp", false, 1, 5}, | |
213 // An invalid English word with a preceding Chinese word | |
214 {L"\x4F60\x597D" L"ifmmp", false, 2, 5}, | |
215 // [ROBUSTNESS] An invalid English word with a preceding Korean word | |
216 {L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 5, 5}, | |
217 // An invalid English word with a preceding Japanese word | |
218 {L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 5, 5}, | |
219 // [ROBUSTNESS] An invalid English word with a preceding Hindi word | |
220 {L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp", false, 6, 5}, | |
221 // [ROBUSTNESS] An invalid English word with two preceding Greek words | |
222 {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5" | |
223 L"ifmmp", false, 8, 5}, | |
224 // [ROBUSTNESS] An invalid English word with a preceding Russian word | |
225 {L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
226 L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 12, 5}, | |
227 | |
228 // An invalid English word with a following whitespace | |
229 {L"ifmmp" L" ", false, 0, 5}, | |
230 // An invalid English word with a following no-break space | |
231 {L"ifmmp" L"\xA0", false, 0, 5}, | |
232 // An invalid English word with a following ideographic space | |
233 {L"ifmmp" L"\x3000", false, 0, 5}, | |
234 // An invalid English word with a following Chinese word | |
235 {L"ifmmp" L"\x4F60\x597D", false, 0, 5}, | |
236 // [ROBUSTNESS] An invalid English word with a following Korean word | |
237 {L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694", false, 0, 5}, | |
238 // An invalid English word with a following Japanese word | |
239 {L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F", false, 0, 5}, | |
240 // [ROBUSTNESS] An invalid English word with a following Hindi word | |
241 {L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928", false, 0, 5}, | |
242 // [ROBUSTNESS] An invalid English word with two following Greek words | |
243 {L"ifmmp" | |
244 L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", false, 0, 5}, | |
245 // [ROBUSTNESS] An invalid English word with a following Russian word | |
246 {L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
247 L"\x0442\x0432\x0443\x0439\x0442\x0435", false, 0, 5}, | |
248 | |
249 // Two invalid English words concatenated with a whitespace | |
250 {L"ifmmp" L" " L"ifmmp", false, 0, 5}, | |
251 // Two invalid English words concatenated with a no-break space | |
252 {L"ifmmp" L"\xA0" L"ifmmp", false, 0, 5}, | |
253 // Two invalid English words concatenated with an ideographic space | |
254 {L"ifmmp" L"\x3000" L"ifmmp", false, 0, 5}, | |
255 // Two invalid English words concatenated with a Chinese word | |
256 {L"ifmmp" L"\x4F60\x597D" L"ifmmp", false, 0, 5}, | |
257 // [ROBUSTNESS] Two invalid English words concatenated with a Korean word | |
258 {L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 0, 5}, | |
259 // Two invalid English words concatenated with a Japanese word | |
260 {L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 0, 5}, | |
261 // [ROBUSTNESS] Two invalid English words concatenated with a Hindi word | |
262 {L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp" , false, 0, 5}, | |
263 // [ROBUSTNESS] Two invalid English words concatenated with two Greek words | |
264 {L"ifmmp" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5" | |
265 L"ifmmp", false, 0, 5}, | |
266 // [ROBUSTNESS] Two invalid English words concatenated with a Russian word | |
267 {L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441" | |
268 L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 0, 5}, | |
269 // [ROBUSTNESS] Two invalid English words concatenated with a contraction | |
270 // character. | |
271 {L"ifmmp:ifmmp", false, 0, 11}, | |
272 | |
273 // [REGRESSION] Issue 13432: "Any word of 13 or 14 characters is not | |
274 // spellcheck" <http://crbug.com/13432>. | |
275 {L"qwertyuiopasd", false, 0, 13}, | |
276 {L"qwertyuiopasdf", false, 0, 14}, | |
277 }; | |
278 | |
279 FilePath hunspell_directory = GetHunspellDirectory(); | |
280 ASSERT_FALSE(hunspell_directory.empty()); | |
281 | |
282 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
283 hunspell_directory, "en-US", NULL, FilePath())); | |
284 spell_checker->Initialize(); | |
285 message_loop_.RunAllPending(); | |
286 | |
287 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
288 size_t input_length = 0; | |
289 if (kTestCases[i].input != NULL) { | |
290 input_length = wcslen(kTestCases[i].input); | |
291 } | |
292 int misspelling_start; | |
293 int misspelling_length; | |
294 bool result = spell_checker->SpellCheckWord( | |
295 WideToUTF16(kTestCases[i].input).c_str(), | |
296 static_cast<int>(input_length), | |
297 0, | |
298 &misspelling_start, | |
299 &misspelling_length, NULL); | |
300 | |
301 EXPECT_EQ(kTestCases[i].expected_result, result); | |
302 EXPECT_EQ(kTestCases[i].misspelling_start, misspelling_start); | |
303 EXPECT_EQ(kTestCases[i].misspelling_length, misspelling_length); | |
304 } | |
305 } | |
306 | |
307 TEST_F(SpellCheckTest, SpellCheckSuggestions_EN_US) { | |
308 static const struct { | |
309 // A string to be tested. | |
310 const wchar_t* input; | |
311 // An expected result for this test case. | |
312 // * true: the input string does not have any invalid words. | |
313 // * false: the input string has one or more invalid words. | |
314 bool expected_result; | |
315 // The position and the length of the first invalid word. | |
316 int misspelling_start; | |
317 int misspelling_length; | |
318 | |
319 // A suggested word that should occur. | |
320 const wchar_t* suggested_word; | |
321 } kTestCases[] = { // A valid English word with a preceding whitespace | |
322 // We need to have separate test cases here, since hunspell and the OS X | |
323 // spellchecking service occasionally differ on what they consider a valid | |
324 // suggestion for a given word, although these lists could likely be | |
325 // integrated somewhat. | |
326 #if defined(OS_MACOSX) | |
327 // These words come from the wikipedia page of the most commonly | |
328 // misspelled words in english. | |
329 // (http://en.wikipedia.org/wiki/Commonly_misspelled_words). | |
330 {L"absense", false, 0, 0, L"absence"}, | |
331 {L"acceptible", false, 0, 0, L"acceptable"}, | |
332 {L"accidentaly", false, 0, 0, L"accidentally"}, | |
333 {L"accomodate", false, 0, 0, L"accommodate"}, | |
334 {L"acheive", false, 0, 0, L"achieve"}, | |
335 {L"acknowlege", false, 0, 0, L"acknowledge"}, | |
336 {L"acquaintence", false, 0, 0, L"acquaintance"}, | |
337 {L"aquire", false, 0, 0, L"acquire"}, | |
338 {L"aquit", false, 0, 0, L"acquit"}, | |
339 {L"acrage", false, 0, 0, L"acreage"}, | |
340 {L"adress", false, 0, 0, L"address"}, | |
341 {L"adultary", false, 0, 0, L"adultery"}, | |
342 {L"advertize", false, 0, 0, L"advertise"}, | |
343 {L"adviseable", false, 0, 0, L"advisable"}, | |
344 {L"agression", false, 0, 0, L"aggression"}, | |
345 {L"alchohol", false, 0, 0, L"alcohol"}, | |
346 {L"alege", false, 0, 0, L"allege"}, | |
347 {L"allegaince", false, 0, 0, L"allegiance"}, | |
348 {L"allmost", false, 0, 0, L"almost"}, | |
349 // Ideally, this test should pass. It works in firefox, but not in hunspell | |
350 // or OS X. | |
351 // {L"alot", false, 0, 0, L"a lot"}, | |
352 {L"amatuer", false, 0, 0, L"amateur"}, | |
353 {L"ammend", false, 0, 0, L"amend"}, | |
354 {L"amung", false, 0, 0, L"among"}, | |
355 {L"anually", false, 0, 0, L"annually"}, | |
356 {L"apparant", false, 0, 0, L"apparent"}, | |
357 {L"artic", false, 0, 0, L"arctic"}, | |
358 {L"arguement", false, 0, 0, L"argument"}, | |
359 {L"athiest", false, 0, 0, L"atheist"}, | |
360 {L"athelete", false, 0, 0, L"athlete"}, | |
361 {L"avrage", false, 0, 0, L"average"}, | |
362 {L"awfull", false, 0, 0, L"awful"}, | |
363 {L"ballance", false, 0, 0, L"balance"}, | |
364 {L"basicly", false, 0, 0, L"basically"}, | |
365 {L"becuase", false, 0, 0, L"because"}, | |
366 {L"becomeing", false, 0, 0, L"becoming"}, | |
367 {L"befor", false, 0, 0, L"before"}, | |
368 {L"begining", false, 0, 0, L"beginning"}, | |
369 {L"beleive", false, 0, 0, L"believe"}, | |
370 {L"bellweather", false, 0, 0, L"bellwether"}, | |
371 {L"benifit", false, 0, 0, L"benefit"}, | |
372 {L"bouy", false, 0, 0, L"buoy"}, | |
373 {L"briliant", false, 0, 0, L"brilliant"}, | |
374 {L"burgler", false, 0, 0, L"burglar"}, | |
375 {L"camoflage", false, 0, 0, L"camouflage"}, | |
376 {L"carrer", false, 0, 0, L"career"}, | |
377 {L"carefull", false, 0, 0, L"careful"}, | |
378 {L"Carribean", false, 0, 0, L"Caribbean"}, | |
379 {L"catagory", false, 0, 0, L"category"}, | |
380 {L"cauhgt", false, 0, 0, L"caught"}, | |
381 {L"cieling", false, 0, 0, L"ceiling"}, | |
382 {L"cemetary", false, 0, 0, L"cemetery"}, | |
383 {L"certin", false, 0, 0, L"certain"}, | |
384 {L"changable", false, 0, 0, L"changeable"}, | |
385 {L"cheif", false, 0, 0, L"chief"}, | |
386 {L"citezen", false, 0, 0, L"citizen"}, | |
387 {L"collaegue", false, 0, 0, L"colleague"}, | |
388 {L"colum", false, 0, 0, L"column"}, | |
389 {L"comming", false, 0, 0, L"coming"}, | |
390 {L"commited", false, 0, 0, L"committed"}, | |
391 {L"compitition", false, 0, 0, L"competition"}, | |
392 {L"conceed", false, 0, 0, L"concede"}, | |
393 {L"congradulate", false, 0, 0, L"congratulate"}, | |
394 {L"consciencious", false, 0, 0, L"conscientious"}, | |
395 {L"concious", false, 0, 0, L"conscious"}, | |
396 {L"concensus", false, 0, 0, L"consensus"}, | |
397 {L"contraversy", false, 0, 0, L"controversy"}, | |
398 {L"conveniance", false, 0, 0, L"convenience"}, | |
399 {L"critecize", false, 0, 0, L"criticize"}, | |
400 {L"dacquiri", false, 0, 0, L"daiquiri"}, | |
401 {L"decieve", false, 0, 0, L"deceive"}, | |
402 {L"dicide", false, 0, 0, L"decide"}, | |
403 {L"definate", false, 0, 0, L"definite"}, | |
404 {L"definitly", false, 0, 0, L"definitely"}, | |
405 {L"deposite", false, 0, 0, L"deposit"}, | |
406 {L"desparate", false, 0, 0, L"desperate"}, | |
407 {L"develope", false, 0, 0, L"develop"}, | |
408 {L"diffrence", false, 0, 0, L"difference"}, | |
409 {L"dilema", false, 0, 0, L"dilemma"}, | |
410 {L"disapear", false, 0, 0, L"disappear"}, | |
411 {L"disapoint", false, 0, 0, L"disappoint"}, | |
412 {L"disasterous", false, 0, 0, L"disastrous"}, | |
413 {L"disipline", false, 0, 0, L"discipline"}, | |
414 {L"drunkeness", false, 0, 0, L"drunkenness"}, | |
415 {L"dumbell", false, 0, 0, L"dumbbell"}, | |
416 {L"durring", false, 0, 0, L"during"}, | |
417 {L"easely", false, 0, 0, L"easily"}, | |
418 {L"eigth", false, 0, 0, L"eight"}, | |
419 {L"embarass", false, 0, 0, L"embarrass"}, | |
420 {L"enviroment", false, 0, 0, L"environment"}, | |
421 {L"equiped", false, 0, 0, L"equipped"}, | |
422 {L"equiptment", false, 0, 0, L"equipment"}, | |
423 {L"exagerate", false, 0, 0, L"exaggerate"}, | |
424 {L"excede", false, 0, 0, L"exceed"}, | |
425 {L"exellent", false, 0, 0, L"excellent"}, | |
426 {L"exsept", false, 0, 0, L"except"}, | |
427 {L"exercize", false, 0, 0, L"exercise"}, | |
428 {L"exilerate", false, 0, 0, L"exhilarate"}, | |
429 {L"existance", false, 0, 0, L"existence"}, | |
430 {L"experiance", false, 0, 0, L"experience"}, | |
431 {L"experament", false, 0, 0, L"experiment"}, | |
432 {L"explaination", false, 0, 0, L"explanation"}, | |
433 {L"extreem", false, 0, 0, L"extreme"}, | |
434 {L"familier", false, 0, 0, L"familiar"}, | |
435 {L"facinating", false, 0, 0, L"fascinating"}, | |
436 {L"firey", false, 0, 0, L"fiery"}, | |
437 {L"finaly", false, 0, 0, L"finally"}, | |
438 {L"flourescent", false, 0, 0, L"fluorescent"}, | |
439 {L"foriegn", false, 0, 0, L"foreign"}, | |
440 {L"fourty", false, 0, 0, L"forty"}, | |
441 {L"foreward", false, 0, 0, L"forward"}, | |
442 {L"freind", false, 0, 0, L"friend"}, | |
443 {L"fullfil", false, 0, 0, L"fulfill"}, | |
444 {L"fundemental", false, 0, 0, L"fundamental"}, | |
445 {L"guage", false, 0, 0, L"gauge"}, | |
446 {L"generaly", false, 0, 0, L"generally"}, | |
447 {L"goverment", false, 0, 0, L"government"}, | |
448 {L"grammer", false, 0, 0, L"grammar"}, | |
449 {L"gratefull", false, 0, 0, L"grateful"}, | |
450 {L"garantee", false, 0, 0, L"guarantee"}, | |
451 {L"guidence", false, 0, 0, L"guidance"}, | |
452 {L"happyness", false, 0, 0, L"happiness"}, | |
453 {L"harrass", false, 0, 0, L"harass"}, | |
454 {L"heighth", false, 0, 0, L"height"}, | |
455 {L"heirarchy", false, 0, 0, L"hierarchy"}, | |
456 {L"humerous", false, 0, 0, L"humorous"}, | |
457 {L"hygene", false, 0, 0, L"hygiene"}, | |
458 {L"hipocrit", false, 0, 0, L"hypocrite"}, | |
459 {L"idenity", false, 0, 0, L"identity"}, | |
460 {L"ignorence", false, 0, 0, L"ignorance"}, | |
461 {L"imaginery", false, 0, 0, L"imaginary"}, | |
462 {L"immitate", false, 0, 0, L"imitate"}, | |
463 {L"immitation", false, 0, 0, L"imitation"}, | |
464 {L"imediately", false, 0, 0, L"immediately"}, | |
465 {L"incidently", false, 0, 0, L"incidentally"}, | |
466 {L"independant", false, 0, 0, L"independent"}, | |
467 {L"indispensible", false, 0, 0, L"indispensable"}, | |
468 {L"innoculate", false, 0, 0, L"inoculate"}, | |
469 {L"inteligence", false, 0, 0, L"intelligence"}, | |
470 {L"intresting", false, 0, 0, L"interesting"}, | |
471 {L"interuption", false, 0, 0, L"interruption"}, | |
472 {L"irrelevent", false, 0, 0, L"irrelevant"}, | |
473 {L"irritible", false, 0, 0, L"irritable"}, | |
474 {L"iland", false, 0, 0, L"island"}, | |
475 {L"jellous", false, 0, 0, L"jealous"}, | |
476 {L"knowlege", false, 0, 0, L"knowledge"}, | |
477 {L"labratory", false, 0, 0, L"laboratory"}, | |
478 {L"liesure", false, 0, 0, L"leisure"}, | |
479 {L"lenght", false, 0, 0, L"length"}, | |
480 {L"liason", false, 0, 0, L"liaison"}, | |
481 {L"libary", false, 0, 0, L"library"}, | |
482 {L"lisence", false, 0, 0, L"license"}, | |
483 {L"lonelyness", false, 0, 0, L"loneliness"}, | |
484 {L"lieing", false, 0, 0, L"lying"}, | |
485 {L"maintenence", false, 0, 0, L"maintenance"}, | |
486 {L"manuever", false, 0, 0, L"maneuver"}, | |
487 {L"marrige", false, 0, 0, L"marriage"}, | |
488 {L"mathmatics", false, 0, 0, L"mathematics"}, | |
489 {L"medcine", false, 0, 0, L"medicine"}, | |
490 {L"medeval", false, 0, 0, L"medieval"}, | |
491 {L"momento", false, 0, 0, L"memento"}, | |
492 {L"millenium", false, 0, 0, L"millennium"}, | |
493 {L"miniture", false, 0, 0, L"miniature"}, | |
494 {L"minite", false, 0, 0, L"minute"}, | |
495 {L"mischevous", false, 0, 0, L"mischievous"}, | |
496 {L"mispell", false, 0, 0, L"misspell"}, | |
497 // Maybe this one should pass, as it works in hunspell, but not in firefox. | |
498 // {L"misterius", false, 0, 0, L"mysterious"}, | |
499 {L"naturaly", false, 0, 0, L"naturally"}, | |
500 {L"neccessary", false, 0, 0, L"necessary"}, | |
501 {L"neice", false, 0, 0, L"niece"}, | |
502 {L"nieghbor", false, 0, 0, L"neighbor"}, | |
503 {L"nieghbour", false, 0, 0, L"neighbor"}, | |
504 {L"niether", false, 0, 0, L"neither"}, | |
505 {L"noticable", false, 0, 0, L"noticeable"}, | |
506 {L"occassion", false, 0, 0, L"occasion"}, | |
507 {L"occasionaly", false, 0, 0, L"occasionally"}, | |
508 {L"occurrance", false, 0, 0, L"occurrence"}, | |
509 {L"occured", false, 0, 0, L"occurred"}, | |
510 {L"oficial", false, 0, 0, L"official"}, | |
511 {L"offen", false, 0, 0, L"often"}, | |
512 {L"ommision", false, 0, 0, L"omission"}, | |
513 {L"oprate", false, 0, 0, L"operate"}, | |
514 {L"oppurtunity", false, 0, 0, L"opportunity"}, | |
515 {L"orignal", false, 0, 0, L"original"}, | |
516 {L"outragous", false, 0, 0, L"outrageous"}, | |
517 {L"parrallel", false, 0, 0, L"parallel"}, | |
518 {L"parliment", false, 0, 0, L"parliament"}, | |
519 {L"particurly", false, 0, 0, L"particularly"}, | |
520 {L"passtime", false, 0, 0, L"pastime"}, | |
521 {L"peculier", false, 0, 0, L"peculiar"}, | |
522 {L"percieve", false, 0, 0, L"perceive"}, | |
523 {L"pernament", false, 0, 0, L"permanent"}, | |
524 {L"perseverence", false, 0, 0, L"perseverance"}, | |
525 {L"personaly", false, 0, 0, L"personally"}, | |
526 {L"personell", false, 0, 0, L"personnel"}, | |
527 {L"persaude", false, 0, 0, L"persuade"}, | |
528 {L"pichure", false, 0, 0, L"picture"}, | |
529 {L"peice", false, 0, 0, L"piece"}, | |
530 {L"plagerize", false, 0, 0, L"plagiarize"}, | |
531 {L"playright", false, 0, 0, L"playwright"}, | |
532 {L"plesant", false, 0, 0, L"pleasant"}, | |
533 {L"pollitical", false, 0, 0, L"political"}, | |
534 {L"posession", false, 0, 0, L"possession"}, | |
535 {L"potatos", false, 0, 0, L"potatoes"}, | |
536 {L"practicle", false, 0, 0, L"practical"}, | |
537 {L"preceed", false, 0, 0, L"precede"}, | |
538 {L"predjudice", false, 0, 0, L"prejudice"}, | |
539 {L"presance", false, 0, 0, L"presence"}, | |
540 {L"privelege", false, 0, 0, L"privilege"}, | |
541 // This one should probably work. It does in FF and Hunspell. | |
542 // {L"probly", false, 0, 0, L"probably"}, | |
543 {L"proffesional", false, 0, 0, L"professional"}, | |
544 {L"professer", false, 0, 0, L"professor"}, | |
545 {L"promiss", false, 0, 0, L"promise"}, | |
546 {L"pronounciation", false, 0, 0, L"pronunciation"}, | |
547 {L"prufe", false, 0, 0, L"proof"}, | |
548 {L"psycology", false, 0, 0, L"psychology"}, | |
549 {L"publically", false, 0, 0, L"publicly"}, | |
550 {L"quanity", false, 0, 0, L"quantity"}, | |
551 {L"quarentine", false, 0, 0, L"quarantine"}, | |
552 {L"questionaire", false, 0, 0, L"questionnaire"}, | |
553 {L"readible", false, 0, 0, L"readable"}, | |
554 {L"realy", false, 0, 0, L"really"}, | |
555 {L"recieve", false, 0, 0, L"receive"}, | |
556 {L"reciept", false, 0, 0, L"receipt"}, | |
557 {L"reconize", false, 0, 0, L"recognize"}, | |
558 {L"recomend", false, 0, 0, L"recommend"}, | |
559 {L"refered", false, 0, 0, L"referred"}, | |
560 {L"referance", false, 0, 0, L"reference"}, | |
561 {L"relevent", false, 0, 0, L"relevant"}, | |
562 {L"religous", false, 0, 0, L"religious"}, | |
563 {L"repitition", false, 0, 0, L"repetition"}, | |
564 {L"restarant", false, 0, 0, L"restaurant"}, | |
565 {L"rythm", false, 0, 0, L"rhythm"}, | |
566 {L"rediculous", false, 0, 0, L"ridiculous"}, | |
567 {L"sacrefice", false, 0, 0, L"sacrifice"}, | |
568 {L"saftey", false, 0, 0, L"safety"}, | |
569 {L"sissors", false, 0, 0, L"scissors"}, | |
570 {L"secratary", false, 0, 0, L"secretary"}, | |
571 {L"sieze", false, 0, 0, L"seize"}, | |
572 {L"seperate", false, 0, 0, L"separate"}, | |
573 {L"sargent", false, 0, 0, L"sergeant"}, | |
574 {L"shineing", false, 0, 0, L"shining"}, | |
575 {L"similer", false, 0, 0, L"similar"}, | |
576 {L"sinceerly", false, 0, 0, L"sincerely"}, | |
577 {L"speach", false, 0, 0, L"speech"}, | |
578 {L"stoping", false, 0, 0, L"stopping"}, | |
579 {L"strenght", false, 0, 0, L"strength"}, | |
580 {L"succede", false, 0, 0, L"succeed"}, | |
581 {L"succesful", false, 0, 0, L"successful"}, | |
582 {L"supercede", false, 0, 0, L"supersede"}, | |
583 {L"surelly", false, 0, 0, L"surely"}, | |
584 {L"suprise", false, 0, 0, L"surprise"}, | |
585 {L"temperture", false, 0, 0, L"temperature"}, | |
586 {L"temprary", false, 0, 0, L"temporary"}, | |
587 {L"tomatos", false, 0, 0, L"tomatoes"}, | |
588 {L"tommorrow", false, 0, 0, L"tomorrow"}, | |
589 {L"tounge", false, 0, 0, L"tongue"}, | |
590 {L"truely", false, 0, 0, L"truly"}, | |
591 {L"twelth", false, 0, 0, L"twelfth"}, | |
592 {L"tyrany", false, 0, 0, L"tyranny"}, | |
593 {L"underate", false, 0, 0, L"underrate"}, | |
594 {L"untill", false, 0, 0, L"until"}, | |
595 {L"unuseual", false, 0, 0, L"unusual"}, | |
596 {L"upholstry", false, 0, 0, L"upholstery"}, | |
597 {L"usible", false, 0, 0, L"usable"}, | |
598 {L"useing", false, 0, 0, L"using"}, | |
599 {L"usualy", false, 0, 0, L"usually"}, | |
600 {L"vaccuum", false, 0, 0, L"vacuum"}, | |
601 {L"vegatarian", false, 0, 0, L"vegetarian"}, | |
602 {L"vehical", false, 0, 0, L"vehicle"}, | |
603 {L"visious", false, 0, 0, L"vicious"}, | |
604 {L"villege", false, 0, 0, L"village"}, | |
605 {L"wierd", false, 0, 0, L"weird"}, | |
606 {L"wellcome", false, 0, 0, L"welcome"}, | |
607 {L"wellfare", false, 0, 0, L"welfare"}, | |
608 {L"wilfull", false, 0, 0, L"willful"}, | |
609 {L"withold", false, 0, 0, L"withhold"}, | |
610 {L"writting", false, 0, 0, L"writing"}, | |
611 #else | |
612 {L"ello", false, 0, 0, L"hello"}, | |
613 {L"ello", false, 0, 0, L"cello"}, | |
614 {L"wate", false, 0, 0, L"water"}, | |
615 {L"wate", false, 0, 0, L"waste"}, | |
616 {L"wate", false, 0, 0, L"sate"}, | |
617 {L"wate", false, 0, 0, L"ate"}, | |
618 {L"jum", false, 0, 0, L"jump"}, | |
619 {L"jum", false, 0, 0, L"hum"}, | |
620 {L"jum", false, 0, 0, L"sum"}, | |
621 {L"jum", false, 0, 0, L"um"}, | |
622 #endif // !OS_MACOSX | |
623 // TODO (Sidchat): add many more examples. | |
624 }; | |
625 | |
626 FilePath hunspell_directory = GetHunspellDirectory(); | |
627 ASSERT_FALSE(hunspell_directory.empty()); | |
628 | |
629 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
630 hunspell_directory, "en-US", NULL, FilePath())); | |
631 spell_checker->Initialize(); | |
632 message_loop_.RunAllPending(); | |
633 | |
634 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
635 std::vector<string16> suggestions; | |
636 size_t input_length = 0; | |
637 if (kTestCases[i].input != NULL) { | |
638 input_length = wcslen(kTestCases[i].input); | |
639 } | |
640 int misspelling_start; | |
641 int misspelling_length; | |
642 bool result = spell_checker->SpellCheckWord( | |
643 WideToUTF16(kTestCases[i].input).c_str(), | |
644 static_cast<int>(input_length), | |
645 0, | |
646 &misspelling_start, | |
647 &misspelling_length, | |
648 &suggestions); | |
649 | |
650 // Check for spelling. | |
651 EXPECT_EQ(kTestCases[i].expected_result, result); | |
652 | |
653 // Check if the suggested words occur. | |
654 bool suggested_word_is_present = false; | |
655 for (int j=0; j < static_cast<int>(suggestions.size()); j++) { | |
656 if (suggestions.at(j).compare(WideToUTF16(kTestCases[i].suggested_word)) | |
657 == 0) { | |
658 suggested_word_is_present = true; | |
659 break; | |
660 } | |
661 } | |
662 | |
663 EXPECT_TRUE(suggested_word_is_present); | |
664 } | |
665 } | |
666 | |
667 // This test verifies our spellchecker can split a text into words and check | |
668 // the spelling of each word in the text. | |
669 TEST_F(SpellCheckTest, SpellCheckText) { | |
670 static const struct { | |
671 const char* language; | |
672 const wchar_t* input; | |
673 } kTestCases[] = { | |
674 { | |
675 // Catalan | |
676 "ca-ES", | |
677 L"La missi\x00F3 de Google \x00E9s organitzar la informaci\x00F3 " | |
678 L"del m\x00F3n i fer que sigui \x00FAtil i accessible universalment." | |
679 }, { | |
680 // Czech | |
681 "cs-CZ", | |
682 L"Posl\x00E1n\x00EDm spole\x010Dnosti Google je " | |
683 L"uspo\x0159\x00E1\x0064\x0061t informace z cel\x00E9ho sv\x011Bta " | |
684 L"tak, aby byly v\x0161\x0065obecn\x011B p\x0159\x00EDstupn\x00E9 " | |
685 L"a u\x017Eite\x010Dn\x00E9." | |
686 }, { | |
687 // Danish | |
688 "da-DK", | |
689 L"Googles " | |
690 L"mission er at organisere verdens information og g\x00F8re den " | |
691 L"almindeligt tilg\x00E6ngelig og nyttig." | |
692 }, { | |
693 // German | |
694 "de-DE", | |
695 L"Das Ziel von Google besteht darin, die auf der Welt vorhandenen " | |
696 L"Informationen zu organisieren und allgemein zug\x00E4nglich und " | |
697 L"nutzbar zu machen." | |
698 }, { | |
699 // Greek | |
700 "el-GR", | |
701 L"\x0391\x03C0\x03BF\x03C3\x03C4\x03BF\x03BB\x03AE " | |
702 L"\x03C4\x03B7\x03C2 Google \x03B5\x03AF\x03BD\x03B1\x03B9 " | |
703 L"\x03BD\x03B1 \x03BF\x03C1\x03B3\x03B1\x03BD\x03CE\x03BD\x03B5\x03B9 " | |
704 L"\x03C4\x03B9\x03C2 " | |
705 L"\x03C0\x03BB\x03B7\x03C1\x03BF\x03C6\x03BF\x03C1\x03AF\x03B5\x03C2 " | |
706 L"\x03C4\x03BF\x03C5 \x03BA\x03CC\x03C3\x03BC\x03BF\x03C5 " | |
707 L"\x03BA\x03B1\x03B9 \x03BD\x03B1 \x03C4\x03B9\x03C2 " | |
708 L"\x03BA\x03B1\x03B8\x03B9\x03C3\x03C4\x03AC " | |
709 L"\x03C0\x03C1\x03BF\x03C3\x03B2\x03AC\x03C3\x03B9\x03BC\x03B5\x03C2 " | |
710 L"\x03BA\x03B1\x03B9 \x03C7\x03C1\x03AE\x03C3\x03B9\x03BC\x03B5\x03C2." | |
711 }, { | |
712 // English (Australia) | |
713 "en-AU", | |
714 // L"Google's " - to be added. | |
715 L"mission is to organise the world's information and make it " | |
716 L"universally accessible and useful." | |
717 }, { | |
718 // English (United Kingdom) | |
719 "en-GB", | |
720 // L"Google's " - to be added. | |
721 L"mission is to organise the world's information and make it " | |
722 L"universally accessible and useful." | |
723 }, { | |
724 // English (United States) | |
725 "en-US", | |
726 L"Google's mission is to organize the world's information and make it " | |
727 L"universally accessible and useful." | |
728 }, { | |
729 // Spanish | |
730 "es-ES", | |
731 L"La misi\x00F3n de " | |
732 // L"Google" - to be added. | |
733 L" es organizar la informaci\x00F3n mundial " | |
734 L"para que resulte universalmente accesible y \x00FAtil." | |
735 }, { | |
736 // Estonian | |
737 "et-EE", | |
738 // L"Google'ile " - to be added. | |
739 L"\x00FClesanne on korraldada maailma teavet ja teeb selle " | |
740 L"k\x00F5igile k\x00E4ttesaadavaks ja kasulikuks.", | |
741 }, { | |
742 // French | |
743 "fr-FR", | |
744 L"Google a pour mission d'organiser les informations \x00E0 " | |
745 L"l'\x00E9\x0063helle mondiale dans le but de les rendre accessibles " | |
746 L"et utiles \x00E0 tous." | |
747 }, { | |
748 // Hebrew | |
749 "he-IL", | |
750 L"\x05D4\x05DE\x05E9\x05D9\x05DE\x05D4 \x05E9\x05DC Google " | |
751 L"\x05D4\x05D9\x05D0 \x05DC\x05D0\x05E8\x05D2\x05DF " | |
752 L"\x05D0\x05EA \x05D4\x05DE\x05D9\x05D3\x05E2 " | |
753 L"\x05D4\x05E2\x05D5\x05DC\x05DE\x05D9 " | |
754 L"\x05D5\x05DC\x05D4\x05E4\x05D5\x05DA \x05D0\x05D5\x05EA\x05D5 " | |
755 L"\x05DC\x05D6\x05DE\x05D9\x05DF " | |
756 L"\x05D5\x05E9\x05D9\x05DE\x05D5\x05E9\x05D9 \x05D1\x05DB\x05DC " | |
757 L"\x05D4\x05E2\x05D5\x05DC\x05DD." | |
758 }, { | |
759 // Hindi | |
760 "hi-IN", | |
761 L"Google \x0915\x093E \x092E\x093F\x0936\x0928 " | |
762 L"\x0926\x0941\x0928\x093F\x092F\x093E \x0915\x0940 " | |
763 L"\x091C\x093E\x0928\x0915\x093E\x0930\x0940 \x0915\x094B " | |
764 L"\x0935\x094D\x092F\x0935\x0938\x094D\x0925\x093F\x0924 " | |
765 L"\x0915\x0930\x0928\x093E \x0914\x0930 \x0909\x0938\x0947 " | |
766 L"\x0938\x093E\x0930\x094D\x0935\x092D\x094C\x092E\x093F\x0915 " | |
767 L"\x0930\x0942\x092A \x0938\x0947 \x092A\x0939\x0941\x0901\x091A " | |
768 L"\x092E\x0947\x0902 \x0914\x0930 \x0909\x092A\x092F\x094B\x0917\x0940 " | |
769 L"\x092C\x0928\x093E\x0928\x093E \x0939\x0948." | |
770 }, { | |
771 // Croatian | |
772 "hr-HR", | |
773 // L"Googleova " - to be added. | |
774 L"je misija organizirati svjetske informacije i u\x010Diniti ih " | |
775 // L"univerzalno " - to be added. | |
776 L"pristupa\x010Dnima i korisnima." | |
777 }, { | |
778 // Indonesian | |
779 "id-ID", | |
780 L"Misi Google adalah untuk mengelola informasi dunia dan membuatnya " | |
781 L"dapat diakses dan bermanfaat secara universal." | |
782 }, { | |
783 // Italian | |
784 "it-IT", | |
785 L"La missione di Google \x00E8 organizzare le informazioni a livello " | |
786 L"mondiale e renderle universalmente accessibili e fruibili." | |
787 }, { | |
788 // Lithuanian | |
789 "lt-LT", | |
790 L"\x201EGoogle\x201C tikslas \x2013 rinkti ir sisteminti pasaulio " | |
791 L"informacij\x0105 bei padaryti j\x0105 prieinam\x0105 ir " | |
792 L"nauding\x0105 visiems." | |
793 }, { | |
794 // Latvian | |
795 "lv-LV", | |
796 L"Google uzdevums ir k\x0101rtot pasaules inform\x0101" | |
797 L"ciju un padar\x012Bt to univers\x0101li pieejamu un noder\x012Bgu." | |
798 }, { | |
799 // Norwegian | |
800 "nb-NO", | |
801 // L"Googles " - to be added. | |
802 L"m\x00E5l er \x00E5 organisere informasjonen i verden og " | |
803 L"gj\x00F8re den tilgjengelig og nyttig for alle." | |
804 }, { | |
805 // Dutch | |
806 "nl-NL", | |
807 L"Het doel van Google is om alle informatie wereldwijd toegankelijk " | |
808 L"en bruikbaar te maken." | |
809 }, { | |
810 // Polish | |
811 "pl-PL", | |
812 L"Misj\x0105 Google jest uporz\x0105" L"dkowanie \x015Bwiatowych " | |
813 L"zasob\x00F3w informacji, aby sta\x0142y si\x0119 one powszechnie " | |
814 L"dost\x0119pne i u\x017Cyteczne." | |
815 }, { | |
816 // Portuguese (Brazil) | |
817 "pt-BR", | |
818 L"A miss\x00E3o do " | |
819 #if !defined(OS_MACOSX) | |
820 L"Google " | |
821 #endif | |
822 L"\x00E9 organizar as informa\x00E7\x00F5" | |
823 L"es do mundo todo e " | |
824 #if !defined(OS_MACOSX) | |
825 L"torn\x00E1-las " | |
826 #endif | |
827 L"acess\x00EDveis e " | |
828 // L"\x00FAteis " - to be added. | |
829 L"em car\x00E1ter universal." | |
830 }, { | |
831 // Portuguese (Portugal) | |
832 "pt-PT", | |
833 L"O " | |
834 #if !defined(OS_MACOSX) | |
835 L"Google " | |
836 #endif | |
837 L"tem por miss\x00E3o organizar a informa\x00E7\x00E3o do " | |
838 L"mundo e " | |
839 #if !defined(OS_MACOSX) | |
840 L"torn\x00E1-la " | |
841 #endif | |
842 L"universalmente acess\x00EDvel e \x00FAtil" | |
843 }, { | |
844 // Romanian | |
845 "ro-RO", | |
846 L"Misiunea Google este de " | |
847 // L"a " - to be added. | |
848 L"organiza informa\x0163iile lumii \x015Fi de " | |
849 // L"a " - to be added. | |
850 L"le face accesibile \x015Fi utile la nivel universal." | |
851 }, { | |
852 // Russian | |
853 "ru-RU", | |
854 L"\x041C\x0438\x0441\x0441\x0438\x044F Google " | |
855 L"\x0441\x043E\x0441\x0442\x043E\x0438\x0442 \x0432 " | |
856 L"\x043E\x0440\x0433\x0430\x043D\x0438\x0437\x0430\x0446\x0438\x0438 " | |
857 L"\x043C\x0438\x0440\x043E\x0432\x043E\x0439 " | |
858 L"\x0438\x043D\x0444\x043E\x0440\x043C\x0430\x0446\x0438\x0438, " | |
859 L"\x043E\x0431\x0435\x0441\x043F\x0435\x0447\x0435\x043D\x0438\x0438 " | |
860 L"\x0435\x0435 " | |
861 L"\x0434\x043E\x0441\x0442\x0443\x043F\x043D\x043E\x0441\x0442\x0438 " | |
862 L"\x0438 \x043F\x043E\x043B\x044C\x0437\x044B \x0434\x043B\x044F " | |
863 L"\x0432\x0441\x0435\x0445." | |
864 }, { | |
865 // Slovak | |
866 "sk-SK", | |
867 L"Spolo\x010Dnos\x0165 Google si dala za \x00FAlohu usporiada\x0165 " | |
868 L"inform\x00E1\x0063ie " | |
869 L"z cel\x00E9ho sveta a zabezpe\x010Di\x0165, " | |
870 L"aby boli v\x0161eobecne dostupn\x00E9 a u\x017Eito\x010Dn\x00E9." | |
871 }, { | |
872 // Slovenian | |
873 "sl-SI", | |
874 // L"Googlovo " - to be added. | |
875 L"poslanstvo je organizirati svetovne informacije in " | |
876 L"omogo\x010Diti njihovo dostopnost in s tem uporabnost za vse." | |
877 }, { | |
878 // Swedish | |
879 "sv-SE", | |
880 L"Googles m\x00E5ls\x00E4ttning \x00E4r att ordna v\x00E4rldens " | |
881 L"samlade information och g\x00F6ra den tillg\x00E4nglig f\x00F6r alla." | |
882 }, { | |
883 // Turkish | |
884 "tr-TR", | |
885 // L"Google\x2019\x0131n " - to be added. | |
886 L"misyonu, d\x00FCnyadaki t\x00FCm bilgileri " | |
887 L"organize etmek ve evrensel olarak eri\x015Filebilir ve " | |
888 L"kullan\x0131\x015Fl\x0131 k\x0131lmakt\x0131r." | |
889 }, { | |
890 // Vietnamese | |
891 "vi-VN", | |
892 L"Nhi\x1EC7m v\x1EE5 c\x1EE7\x0061 " | |
893 L"Google la \x0111\x1EC3 t\x1ED5 ch\x1EE9\x0063 " | |
894 L"c\x00E1\x0063 th\x00F4ng tin c\x1EE7\x0061 " | |
895 L"th\x1EBF gi\x1EDBi va l\x00E0m cho n\x00F3 universal c\x00F3 " | |
896 L"th\x1EC3 truy c\x1EADp va h\x1EEFu d\x1EE5ng h\x01A1n." | |
897 }, | |
898 }; | |
899 | |
900 FilePath hunspell_directory = GetHunspellDirectory(); | |
901 ASSERT_FALSE(hunspell_directory.empty()); | |
902 | |
903 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
904 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
905 hunspell_directory, kTestCases[i].language, NULL, FilePath())); | |
906 spell_checker->Initialize(); | |
907 message_loop_.RunAllPending(); | |
908 | |
909 size_t input_length = 0; | |
910 if (kTestCases[i].input != NULL) | |
911 input_length = wcslen(kTestCases[i].input); | |
912 | |
913 int misspelling_start = 0; | |
914 int misspelling_length = 0; | |
915 bool result = spell_checker->SpellCheckWord( | |
916 WideToUTF16(kTestCases[i].input).c_str(), | |
917 static_cast<int>(input_length), | |
918 0, | |
919 &misspelling_start, | |
920 &misspelling_length, NULL); | |
921 | |
922 EXPECT_EQ(true, result) << kTestCases[i].language; | |
923 EXPECT_EQ(0, misspelling_start); | |
924 EXPECT_EQ(0, misspelling_length); | |
925 } | |
926 } | |
927 | |
928 // This test Adds words to the SpellChecker and veifies that it remembers them. | |
929 TEST_F(SpellCheckTest, DISABLED_SpellCheckAddToDictionary_EN_US) { | |
930 static const struct { | |
931 // A string to be added to SpellChecker. | |
932 const wchar_t* word_to_add; | |
933 } kTestCases[] = { // Words to be added to the SpellChecker. | |
934 {L"Googley"}, | |
935 {L"Googleplex"}, | |
936 {L"Googler"}, | |
937 }; | |
938 | |
939 FilePath custom_dictionary_file(kTempCustomDictionaryFile); | |
940 FilePath hunspell_directory = GetHunspellDirectory(); | |
941 ASSERT_FALSE(hunspell_directory.empty()); | |
942 | |
943 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
944 hunspell_directory, "en-US", NULL, custom_dictionary_file)); | |
945 spell_checker->Initialize(); | |
946 message_loop_.RunAllPending(); | |
947 | |
948 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
949 // Add the word to spellchecker. | |
950 spell_checker->AddWord(WideToUTF16(kTestCases[i].word_to_add)); | |
951 | |
952 // Now check whether it is added to Spellchecker. | |
953 std::vector<string16> suggestions; | |
954 size_t input_length = 0; | |
955 if (kTestCases[i].word_to_add != NULL) { | |
956 input_length = wcslen(kTestCases[i].word_to_add); | |
957 } | |
958 int misspelling_start; | |
959 int misspelling_length; | |
960 bool result = spell_checker->SpellCheckWord( | |
961 WideToUTF16(kTestCases[i].word_to_add).c_str(), | |
962 static_cast<int>(input_length), | |
963 0, | |
964 &misspelling_start, | |
965 &misspelling_length, | |
966 &suggestions); | |
967 | |
968 // Check for spelling. | |
969 EXPECT_TRUE(result); | |
970 } | |
971 | |
972 // Now initialize another spellchecker to see that AddToWord is permanent. | |
973 scoped_refptr<SpellChecker> spell_checker_new(new SpellChecker( | |
974 hunspell_directory, "en-US", NULL, custom_dictionary_file)); | |
975 spell_checker->Initialize(); | |
976 message_loop_.RunAllPending(); | |
977 | |
978 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
979 // Now check whether it is added to Spellchecker. | |
980 std::vector<string16> suggestions; | |
981 size_t input_length = 0; | |
982 if (kTestCases[i].word_to_add != NULL) { | |
983 input_length = wcslen(kTestCases[i].word_to_add); | |
984 } | |
985 int misspelling_start; | |
986 int misspelling_length; | |
987 bool result = spell_checker_new->SpellCheckWord( | |
988 WideToUTF16(kTestCases[i].word_to_add).c_str(), | |
989 static_cast<int>(input_length), | |
990 0, | |
991 &misspelling_start, | |
992 &misspelling_length, | |
993 &suggestions); | |
994 | |
995 // Check for spelling. | |
996 EXPECT_TRUE(result); | |
997 } | |
998 | |
999 // Remove the temp custom dictionary file. | |
1000 file_util::Delete(custom_dictionary_file, false); | |
1001 } | |
1002 | |
1003 // SpellChecker should suggest custome words for misspelled words. | |
1004 TEST_F(SpellCheckTest, DISABLED_SpellCheckSuggestionsAddToDictionary_EN_US) { | |
1005 static const struct { | |
1006 // A string to be added to SpellChecker. | |
1007 const wchar_t* word_to_add; | |
1008 } kTestCases[] = { // word to be added to SpellChecker | |
1009 {L"Googley"}, | |
1010 {L"Googleplex"}, | |
1011 {L"Googler"}, | |
1012 }; | |
1013 | |
1014 FilePath custom_dictionary_file(kTempCustomDictionaryFile); | |
1015 FilePath hunspell_directory = GetHunspellDirectory(); | |
1016 ASSERT_FALSE(hunspell_directory.empty()); | |
1017 | |
1018 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
1019 hunspell_directory, "en-US", NULL, custom_dictionary_file)); | |
1020 spell_checker->Initialize(); | |
1021 message_loop_.RunAllPending(); | |
1022 | |
1023 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
1024 // Add the word to spellchecker. | |
1025 spell_checker->AddWord(WideToUTF16(kTestCases[i].word_to_add)); | |
1026 } | |
1027 | |
1028 // Now check to see whether the custom words are suggested for | |
1029 // misspelled but similar words. | |
1030 static const struct { | |
1031 // A string to be tested. | |
1032 const wchar_t* input; | |
1033 // An expected result for this test case. | |
1034 // * true: the input string does not have any invalid words. | |
1035 // * false: the input string has one or more invalid words. | |
1036 bool expected_result; | |
1037 // The position and the length of the first invalid word. | |
1038 int misspelling_start; | |
1039 int misspelling_length; | |
1040 | |
1041 // A suggested word that should occur. | |
1042 const wchar_t* suggested_word; | |
1043 } kTestCasesToBeTested[] = { | |
1044 {L"oogley", false, 0, 0, L"Googley"}, | |
1045 {L"oogler", false, 0, 0, L"Googler"}, | |
1046 {L"oogleplex", false, 0, 0, L"Googleplex"}, | |
1047 }; | |
1048 | |
1049 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCasesToBeTested); ++i) { | |
1050 std::vector<string16> suggestions; | |
1051 size_t input_length = 0; | |
1052 if (kTestCasesToBeTested[i].input != NULL) { | |
1053 input_length = wcslen(kTestCasesToBeTested[i].input); | |
1054 } | |
1055 int misspelling_start; | |
1056 int misspelling_length; | |
1057 bool result = spell_checker->SpellCheckWord( | |
1058 WideToUTF16(kTestCasesToBeTested[i].input).c_str(), | |
1059 static_cast<int>(input_length), | |
1060 0, | |
1061 &misspelling_start, | |
1062 &misspelling_length, | |
1063 &suggestions); | |
1064 | |
1065 // Check for spelling. | |
1066 EXPECT_EQ(result, kTestCasesToBeTested[i].expected_result); | |
1067 | |
1068 // Check if the suggested words occur. | |
1069 bool suggested_word_is_present = false; | |
1070 for (int j=0; j < static_cast<int>(suggestions.size()); j++) { | |
1071 if (suggestions.at(j).compare( | |
1072 WideToUTF16(kTestCasesToBeTested[i].suggested_word)) == | |
1073 0) { | |
1074 suggested_word_is_present = true; | |
1075 break; | |
1076 } | |
1077 } | |
1078 | |
1079 EXPECT_TRUE(suggested_word_is_present); | |
1080 } | |
1081 | |
1082 // Remove the temp custom dictionary file. | |
1083 file_util::Delete(custom_dictionary_file, false); | |
1084 } | |
1085 | |
1086 TEST_F(SpellCheckTest, GetAutoCorrectionWord_EN_US) { | |
1087 static const struct { | |
1088 // A misspelled word. | |
1089 const char* input; | |
1090 | |
1091 // An expected result for this test case. | |
1092 // Should be an empty string if there are no suggestions for auto correct. | |
1093 const char* expected_result; | |
1094 } kTestCases[] = { | |
1095 {"teh", "the"}, | |
1096 {"moer", "more"}, | |
1097 {"watre", "water"}, | |
1098 {"noen", ""}, | |
1099 {"what", ""}, | |
1100 }; | |
1101 | |
1102 FilePath hunspell_directory = GetHunspellDirectory(); | |
1103 ASSERT_FALSE(hunspell_directory.empty()); | |
1104 | |
1105 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
1106 hunspell_directory, "en-US", NULL, FilePath())); | |
1107 spell_checker->EnableAutoSpellCorrect(true); | |
1108 spell_checker->Initialize(); | |
1109 message_loop_.RunAllPending(); | |
1110 | |
1111 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
1112 string16 misspelled_word(UTF8ToUTF16(kTestCases[i].input)); | |
1113 string16 expected_autocorrect_word( | |
1114 UTF8ToUTF16(kTestCases[i].expected_result)); | |
1115 string16 autocorrect_word = spell_checker->GetAutoCorrectionWord( | |
1116 misspelled_word, 0); | |
1117 | |
1118 // Check for spelling. | |
1119 EXPECT_EQ(expected_autocorrect_word, autocorrect_word); | |
1120 } | |
1121 } | |
1122 | |
1123 #if defined(OS_MACOSX) | |
1124 // Tests that words are properly ignored. Currently only enabled on OS X as it | |
1125 // is the only platform to support ignoring words. Note that in this test, we | |
1126 // supply a non-zero doc_tag, in order to test that ignored words are matched to | |
1127 // the correct document. | |
1128 TEST_F(SpellCheckTest, IgnoreWords_EN_US) { | |
1129 static const struct { | |
1130 // A misspelled word. | |
1131 const char* input; | |
1132 bool input_result; | |
1133 } kTestCases[] = { | |
1134 {"teh", false}, | |
1135 {"moer", false}, | |
1136 {"watre", false}, | |
1137 {"noen", false}, | |
1138 }; | |
1139 | |
1140 FilePath hunspell_directory = GetHunspellDirectory(); | |
1141 ASSERT_FALSE(hunspell_directory.empty()); | |
1142 | |
1143 scoped_refptr<SpellChecker> spell_checker(new SpellChecker( | |
1144 hunspell_directory, "en-US", NULL, FilePath())); | |
1145 spell_checker->Initialize(); | |
1146 message_loop_.RunAllPending(); | |
1147 | |
1148 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { | |
1149 string16 word(UTF8ToUTF16(kTestCases[i].input)); | |
1150 std::vector<string16> suggestions; | |
1151 size_t input_length = 0; | |
1152 if (kTestCases[i].input != NULL) { | |
1153 input_length = word.length(); | |
1154 } | |
1155 int misspelling_start; | |
1156 int misspelling_length; | |
1157 | |
1158 int doc_tag = SpellCheckerPlatform::GetDocumentTag(); | |
1159 bool result = spell_checker->SpellCheckWord(word.c_str(), | |
1160 static_cast<int>(input_length), | |
1161 doc_tag, | |
1162 &misspelling_start, | |
1163 &misspelling_length, | |
1164 &suggestions); | |
1165 | |
1166 // The word should show up as misspelled. | |
1167 EXPECT_EQ(kTestCases[i].input_result, result); | |
1168 | |
1169 // Ignore the word. | |
1170 SpellCheckerPlatform::IgnoreWord(word); | |
1171 | |
1172 // Spellcheck again. | |
1173 result = spell_checker->SpellCheckWord(word.c_str(), | |
1174 static_cast<int>(input_length), | |
1175 doc_tag, | |
1176 &misspelling_start, | |
1177 &misspelling_length, | |
1178 &suggestions); | |
1179 | |
1180 // The word should now show up as correctly spelled. | |
1181 EXPECT_EQ(!(kTestCases[i].input_result), result); | |
1182 | |
1183 // Close the docuemnt. Any words that we had previously ignored should no | |
1184 // longer be ignored and thus should show up as misspelled. | |
1185 SpellCheckerPlatform::CloseDocumentWithTag(doc_tag); | |
1186 | |
1187 // Spellcheck one more time. | |
1188 result = spell_checker->SpellCheckWord(word.c_str(), | |
1189 static_cast<int>(input_length), | |
1190 doc_tag, | |
1191 &misspelling_start, | |
1192 &misspelling_length, | |
1193 &suggestions); | |
1194 | |
1195 // The word should now show be spelled wrong again | |
1196 EXPECT_EQ(kTestCases[i].input_result, result); | |
1197 } | |
1198 } // Test IgnoreWords_EN_US | |
1199 #endif // OS_MACOSX | |
OLD | NEW |