OLD | NEW |
1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/spellchecker.h" | 5 #include "chrome/renderer/spellchecker/spellcheck.h" |
6 | 6 |
7 #include "app/l10n_util.h" | |
8 #include "base/basictypes.h" | |
9 #include "base/compiler_specific.h" | |
10 #include "base/file_util.h" | 7 #include "base/file_util.h" |
11 #include "base/histogram.h" | 8 #include "base/histogram.h" |
12 #include "base/logging.h" | 9 #include "base/time.h" |
13 #include "base/path_service.h" | 10 #include "chrome/renderer/render_thread.h" |
14 #include "base/stats_counters.h" | |
15 #include "base/string_util.h" | |
16 #include "base/thread.h" | |
17 #include "chrome/browser/browser_process.h" | |
18 #include "chrome/browser/chrome_thread.h" | |
19 #include "chrome/browser/net/url_fetcher.h" | |
20 #include "chrome/browser/profile.h" | |
21 #include "chrome/browser/spellchecker_common.h" | |
22 #include "chrome/browser/spellchecker_platform_engine.h" | |
23 #include "chrome/common/chrome_constants.h" | |
24 #include "chrome/common/chrome_counters.h" | |
25 #include "chrome/common/chrome_paths.h" | |
26 #include "chrome/common/pref_names.h" | |
27 #include "chrome/common/pref_service.h" | |
28 #include "grit/generated_resources.h" | |
29 #include "grit/locale_settings.h" | |
30 #include "net/url_request/url_request.h" | |
31 #include "third_party/hunspell/src/hunspell/hunspell.hxx" | 11 #include "third_party/hunspell/src/hunspell/hunspell.hxx" |
32 | 12 |
| 13 static const int kMaxAutoCorrectWordSize = 8; |
| 14 static const int kMaxSuggestions = 5; |
| 15 |
33 using base::TimeTicks; | 16 using base::TimeTicks; |
34 | 17 |
35 namespace { | 18 SpellCheck::SpellCheck() |
36 | 19 : auto_spell_correct_turned_on_(false), |
37 static const struct { | 20 // TODO(estade): initialize this properly. |
38 // The language. | 21 is_using_platform_spelling_engine_(false), |
39 const char* language; | 22 initialized_(false) { |
40 | 23 // Wait till we check the first word before doing any initializing. |
41 // The corresponding language and region, used by the dictionaries. | |
42 const char* language_region; | |
43 } g_supported_spellchecker_languages[] = { | |
44 {"en-US", "en-US"}, | |
45 {"en-GB", "en-GB"}, | |
46 {"en-AU", "en-AU"}, | |
47 {"fr", "fr-FR"}, | |
48 {"it", "it-IT"}, | |
49 {"de", "de-DE"}, | |
50 {"es", "es-ES"}, | |
51 {"nl", "nl-NL"}, | |
52 {"pt-BR", "pt-BR"}, | |
53 {"ru", "ru-RU"}, | |
54 {"pl", "pl-PL"}, | |
55 // {"th", "th-TH"}, // Not to be included in Spellchecker as per B=1277824 | |
56 {"sv", "sv-SE"}, | |
57 {"da", "da-DK"}, | |
58 {"pt-PT", "pt-PT"}, | |
59 {"ro", "ro-RO"}, | |
60 // {"hu", "hu-HU"}, // Not to be included in Spellchecker as per B=1277824 | |
61 {"he", "he-IL"}, | |
62 {"id", "id-ID"}, | |
63 {"cs", "cs-CZ"}, | |
64 {"el", "el-GR"}, | |
65 {"nb", "nb-NO"}, | |
66 {"vi", "vi-VN"}, | |
67 // {"bg", "bg-BG"}, // Not to be included in Spellchecker as per B=1277824 | |
68 {"hr", "hr-HR"}, | |
69 {"lt", "lt-LT"}, | |
70 {"sk", "sk-SK"}, | |
71 {"sl", "sl-SI"}, | |
72 {"ca", "ca-ES"}, | |
73 {"lv", "lv-LV"}, | |
74 // {"uk", "uk-UA"}, // Not to be included in Spellchecker as per B=1277824 | |
75 {"hi", "hi-IN"}, | |
76 {"et", "et-EE"}, | |
77 {"tr", "tr-TR"}, | |
78 }; | |
79 | |
80 // Get the fallback folder (currently chrome::DIR_USER_DATA) where the | |
81 // dictionary is downloaded in case of system-wide installations. | |
82 FilePath GetFallbackDictionaryDownloadDirectory() { | |
83 FilePath dict_dir_userdata; | |
84 PathService::Get(chrome::DIR_USER_DATA, &dict_dir_userdata); | |
85 dict_dir_userdata = dict_dir_userdata.AppendASCII("Dictionaries"); | |
86 return dict_dir_userdata; | |
87 } | 24 } |
88 | 25 |
89 bool SaveBufferToFile(const std::string& data, | 26 SpellCheck::~SpellCheck() { |
90 FilePath file_to_write) { | |
91 int num_bytes = data.length(); | |
92 return file_util::WriteFile(file_to_write, data.data(), num_bytes) == | |
93 num_bytes; | |
94 } | 27 } |
95 | 28 |
96 } // namespace | 29 void SpellCheck::Init(const base::FileDescriptor& fd, |
| 30 const std::vector<std::string>& custom_words, |
| 31 const std::string language) { |
| 32 initialized_ = true; |
| 33 hunspell_.reset(); |
| 34 bdict_file_.reset(); |
| 35 fd_ = fd; |
| 36 character_attributes_.SetDefaultLanguage(language); |
97 | 37 |
98 // This is a helper class which acts as a proxy for invoking a task from the | 38 custom_words_.insert(custom_words_.end(), |
99 // file loop back to the IO loop. Invoking a task from file loop to the IO | 39 custom_words.begin(), custom_words.end()); |
100 // loop directly is not safe as during browser shutdown, the IO loop tears | 40 |
101 // down before the file loop. To avoid a crash, this object is invoked in the | 41 // We delay the actual initialization of hunspell until it is needed. |
102 // UI loop from the file loop, from where it gets the IO thread directly from | 42 } |
103 // g_browser_process and invokes the given task in the IO loop if it is not | 43 |
104 // NULL. This object also takes ownership of the given task. | 44 bool SpellCheck::SpellCheckWord( |
105 class UIProxyForIOTask : public Task { | 45 const char16* in_word, |
106 public: | 46 int in_word_len, |
107 explicit UIProxyForIOTask(Task* callback_task, SpellChecker* spellchecker) | 47 int tag, |
108 : callback_task_(callback_task), | 48 int* misspelling_start, |
109 spellchecker_(spellchecker) { | 49 int* misspelling_len, |
| 50 std::vector<string16>* optional_suggestions) { |
| 51 DCHECK(in_word_len >= 0); |
| 52 DCHECK(misspelling_start && misspelling_len) << "Out vars must be given."; |
| 53 |
| 54 // Do nothing if we need to delay initialization. (Rather than blocking, |
| 55 // report the word as correctly spelled.) |
| 56 if (InitializeIfNeeded()) |
| 57 return true; |
| 58 |
| 59 // Do nothing if spell checking is disabled. |
| 60 if (initialized_ && fd_.fd == -1) |
| 61 return true; |
| 62 |
| 63 *misspelling_start = 0; |
| 64 *misspelling_len = 0; |
| 65 if (in_word_len == 0) |
| 66 return true; // No input means always spelled correctly. |
| 67 |
| 68 SpellcheckWordIterator word_iterator; |
| 69 string16 word; |
| 70 int word_start; |
| 71 int word_length; |
| 72 word_iterator.Initialize(&character_attributes_, in_word, in_word_len, true); |
| 73 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) { |
| 74 // Found a word (or a contraction) that the spellchecker can check the |
| 75 // spelling of. |
| 76 if (CheckSpelling(word, tag)) |
| 77 continue; |
| 78 |
| 79 // If the given word is a concatenated word of two or more valid words |
| 80 // (e.g. "hello:hello"), we should treat it as a valid word. |
| 81 if (IsValidContraction(word, tag)) |
| 82 continue; |
| 83 |
| 84 *misspelling_start = word_start; |
| 85 *misspelling_len = word_length; |
| 86 |
| 87 // Get the list of suggested words. |
| 88 if (optional_suggestions) |
| 89 FillSuggestionList(word, optional_suggestions); |
| 90 return false; |
110 } | 91 } |
111 | 92 |
112 private: | 93 return true; |
113 void Run(); | |
114 | |
115 Task* callback_task_; | |
116 // The SpellChecker that invoked the file loop task. May be NULL. If not | |
117 // NULL, then we will Release() on it if we don't run |callback_task_|. This | |
118 // balances any refs the spellchecker might have had outstanding which it | |
119 // would have Released() when |callback_task_| was run. | |
120 SpellChecker* spellchecker_; | |
121 DISALLOW_COPY_AND_ASSIGN(UIProxyForIOTask); | |
122 }; | |
123 | |
124 void UIProxyForIOTask::Run() { | |
125 // This has been invoked in the UI thread. | |
126 base::Thread* io_thread = g_browser_process->io_thread(); | |
127 if (io_thread) { // io_thread has not been torn down yet. | |
128 MessageLoop* io_loop = io_thread->message_loop(); | |
129 io_loop->PostTask(FROM_HERE, callback_task_); | |
130 } else { | |
131 if (spellchecker_) | |
132 spellchecker_->Release(); | |
133 delete callback_task_; | |
134 } | |
135 | |
136 callback_task_ = NULL; | |
137 } | 94 } |
138 | 95 |
139 // Design: The spellchecker initializes hunspell_ in the Initialize() method. | 96 string16 SpellCheck::GetAutoCorrectionWord(const string16& word, int tag) { |
140 // This is done using the dictionary file on disk, e.g. "en-US_1_1.bdic". | |
141 // Initialization of hunspell_ is held off during this process. If the | |
142 // dictionary is not available, we first attempt to download and save it. After | |
143 // the dictionary is downloaded and saved to disk (or the attempt to do so | |
144 // fails)), corresponding flags are set | |
145 // in spellchecker - in the IO thread. Since IO thread goes first during closing | |
146 // of browser, a proxy task |UIProxyForIOTask| is created in the UI thread, | |
147 // which obtains the IO thread independently and invokes the task in the IO | |
148 // thread if it's not NULL. After the flags are cleared, a (final) attempt is | |
149 // made to initialize hunspell_. If it fails even then (dictionary could not | |
150 // download), no more attempts are made to initialize it. | |
151 class SaveDictionaryTask : public Task { | |
152 public: | |
153 SaveDictionaryTask(Task* on_dictionary_save_complete_callback_task, | |
154 const FilePath& first_attempt_file_name, | |
155 const FilePath& fallback_file_name, | |
156 const std::string& data) | |
157 : on_dictionary_save_complete_callback_task_( | |
158 on_dictionary_save_complete_callback_task), | |
159 first_attempt_file_name_(first_attempt_file_name), | |
160 fallback_file_name_(fallback_file_name), | |
161 data_(data) { | |
162 } | |
163 | |
164 private: | |
165 void Run(); | |
166 | |
167 bool SaveBufferToFile(const std::string& data, | |
168 FilePath file_to_write) { | |
169 int num_bytes = data.length(); | |
170 return file_util::WriteFile(file_to_write, data.data(), num_bytes) == | |
171 num_bytes; | |
172 } | |
173 | |
174 // factory object to invokelater back to spellchecker in io thread on | |
175 // download completion to change appropriate flags. | |
176 Task* on_dictionary_save_complete_callback_task_; | |
177 | |
178 // The file which will be stored in the first attempt. | |
179 FilePath first_attempt_file_name_; | |
180 | |
181 // The file which will be stored as a fallback. | |
182 FilePath fallback_file_name_; | |
183 | |
184 // The buffer which has to be stored to disk. | |
185 std::string data_; | |
186 | |
187 // This invokes back to io loop when downloading is over. | |
188 DISALLOW_COPY_AND_ASSIGN(SaveDictionaryTask); | |
189 }; | |
190 | |
191 void SaveDictionaryTask::Run() { | |
192 if (!SaveBufferToFile(data_, first_attempt_file_name_)) { | |
193 // Try saving it to |fallback_file_name_|, which almost surely has | |
194 // write permission. If even this fails, there is nothing to be done. | |
195 FilePath fallback_dir = fallback_file_name_.DirName(); | |
196 // Create the directory if it does not exist. | |
197 if (!file_util::PathExists(fallback_dir)) | |
198 file_util::CreateDirectory(fallback_dir); | |
199 SaveBufferToFile(data_, fallback_file_name_); | |
200 } // Unsuccessful save is taken care of in SpellChecker::Initialize(). | |
201 | |
202 // Set Flag that dictionary is not downloading anymore. | |
203 MessageLoop* ui_loop = ChromeThread::GetMessageLoop(ChromeThread::UI); | |
204 ui_loop->PostTask(FROM_HERE, | |
205 new UIProxyForIOTask(on_dictionary_save_complete_callback_task_, NULL)); | |
206 } | |
207 | |
208 // Design: this task tries to read the dictionary from disk and load it into | |
209 // memory. It is executed on the file thread, and posts the results back to | |
210 // the IO thread (via the UI thread---see UIProxyForIOTask). | |
211 // The task first checks for the existence of the dictionary in one of the two | |
212 // given locations. If it does not exist, the task informs the SpellChecker, | |
213 // which will try to download the directory and run a new ReadDictionaryTask. | |
214 class ReadDictionaryTask : public Task { | |
215 public: | |
216 ReadDictionaryTask(SpellChecker* spellchecker, | |
217 const FilePath& dict_file_name_app, | |
218 const FilePath& dict_file_name_usr) | |
219 : spellchecker_(spellchecker), | |
220 hunspell_(NULL), | |
221 bdict_file_(NULL), | |
222 custom_dictionary_file_name_( | |
223 spellchecker->custom_dictionary_file_name_), | |
224 dict_file_name_app_(dict_file_name_app), | |
225 dict_file_name_usr_(dict_file_name_usr) { | |
226 } | |
227 | |
228 virtual void Run() { | |
229 FilePath bdict_file_path; | |
230 if (file_util::PathExists(dict_file_name_app_)) { | |
231 bdict_file_path = dict_file_name_app_; | |
232 } else if (file_util::PathExists(dict_file_name_usr_)) { | |
233 bdict_file_path = dict_file_name_usr_; | |
234 } else { | |
235 Finish(false); | |
236 return; | |
237 } | |
238 | |
239 bdict_file_ = new file_util::MemoryMappedFile; | |
240 if (bdict_file_->Initialize(bdict_file_path)) { | |
241 TimeTicks start_time = TimeTicks::Now(); | |
242 | |
243 hunspell_ = | |
244 new Hunspell(bdict_file_->data(), bdict_file_->length()); | |
245 | |
246 // Add custom words to Hunspell. | |
247 std::string contents; | |
248 file_util::ReadFileToString(custom_dictionary_file_name_, &contents); | |
249 std::vector<std::string> list_of_words; | |
250 SplitString(contents, '\n', &list_of_words); | |
251 for (std::vector<std::string>::iterator it = list_of_words.begin(); | |
252 it != list_of_words.end(); ++it) { | |
253 hunspell_->add(it->c_str()); | |
254 } | |
255 | |
256 DHISTOGRAM_TIMES("Spellcheck.InitTime", | |
257 TimeTicks::Now() - start_time); | |
258 } else { | |
259 delete bdict_file_; | |
260 bdict_file_ = NULL; | |
261 } | |
262 | |
263 Finish(true); | |
264 } | |
265 | |
266 private: | |
267 void Finish(bool file_existed) { | |
268 Task* task = NewRunnableMethod(spellchecker_, &SpellChecker::HunspellInited, | |
269 hunspell_, bdict_file_, file_existed); | |
270 if (spellchecker_->file_loop_) { | |
271 MessageLoop* ui_loop = ChromeThread::GetMessageLoop(ChromeThread::UI); | |
272 // We were called on the file loop. Post back to the IO loop. | |
273 // If this never gets posted to the IO loop, then we will leak |hunspell_| | |
274 // and |bdict_file_|. But that can only happen during shutdown, so it's | |
275 // not worth caring about. | |
276 ui_loop->PostTask(FROM_HERE, new UIProxyForIOTask(task, spellchecker_)); | |
277 } else { | |
278 // We were called directly (e.g., during testing). Run the task directly. | |
279 task->Run(); | |
280 delete task; | |
281 } | |
282 } | |
283 | |
284 // The SpellChecker we are working for. We are guaranteed to be outlived | |
285 // by this object because it AddRefs() itself before calling us. | |
286 // Accessing it is not necessarily thread safe, but are careful to only access | |
287 // it in ways that are. | |
288 SpellChecker* spellchecker_; | |
289 Hunspell* hunspell_; | |
290 file_util::MemoryMappedFile* bdict_file_; | |
291 | |
292 FilePath custom_dictionary_file_name_; | |
293 FilePath dict_file_name_app_; | |
294 FilePath dict_file_name_usr_; | |
295 | |
296 DISALLOW_COPY_AND_ASSIGN(ReadDictionaryTask); | |
297 }; | |
298 | |
299 void SpellChecker::SpellCheckLanguages(std::vector<std::string>* languages) { | |
300 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
301 ++i) { | |
302 languages->push_back(g_supported_spellchecker_languages[i].language); | |
303 } | |
304 } | |
305 | |
306 // This function returns the language-region version of language name. | |
307 // e.g. returns hi-IN for hi. | |
308 std::string SpellChecker::GetSpellCheckLanguageRegion( | |
309 std::string input_language) { | |
310 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
311 ++i) { | |
312 std::string language( | |
313 g_supported_spellchecker_languages[i].language); | |
314 if (language == input_language) | |
315 return std::string( | |
316 g_supported_spellchecker_languages[i].language_region); | |
317 } | |
318 | |
319 return input_language; | |
320 } | |
321 | |
322 | |
323 std::string SpellChecker::GetLanguageFromLanguageRegion( | |
324 std::string input_language) { | |
325 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
326 ++i) { | |
327 std::string language( | |
328 g_supported_spellchecker_languages[i].language_region); | |
329 if (language == input_language) | |
330 return std::string(g_supported_spellchecker_languages[i].language); | |
331 } | |
332 | |
333 return input_language; | |
334 } | |
335 | |
336 std::string SpellChecker::GetCorrespondingSpellCheckLanguage( | |
337 const std::string& language) { | |
338 // Look for exact match in the Spell Check language list. | |
339 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
340 ++i) { | |
341 // First look for exact match in the language region of the list. | |
342 std::string spellcheck_language( | |
343 g_supported_spellchecker_languages[i].language); | |
344 if (spellcheck_language == language) | |
345 return language; | |
346 | |
347 // Next, look for exact match in the language_region part of the list. | |
348 std::string spellcheck_language_region( | |
349 g_supported_spellchecker_languages[i].language_region); | |
350 if (spellcheck_language_region == language) | |
351 return g_supported_spellchecker_languages[i].language; | |
352 } | |
353 | |
354 // Look for a match by comparing only language parts. All the 'en-RR' | |
355 // except for 'en-GB' exactly matched in the above loop, will match | |
356 // 'en-US'. This is not ideal because 'en-ZA', 'en-NZ' had | |
357 // better be matched with 'en-GB'. This does not handle cases like | |
358 // 'az-Latn-AZ' vs 'az-Arab-AZ', either, but we don't use 3-part | |
359 // locale ids with a script code in the middle, yet. | |
360 // TODO(jungshik): Add a better fallback. | |
361 std::string language_part(language, 0, language.find('-')); | |
362 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
363 ++i) { | |
364 std::string spellcheck_language( | |
365 g_supported_spellchecker_languages[i].language_region); | |
366 if (spellcheck_language.substr(0, spellcheck_language.find('-')) == | |
367 language_part) | |
368 return spellcheck_language; | |
369 } | |
370 | |
371 // No match found - return blank. | |
372 return std::string(); | |
373 } | |
374 | |
375 int SpellChecker::GetSpellCheckLanguages( | |
376 Profile* profile, | |
377 std::vector<std::string>* languages) { | |
378 StringPrefMember accept_languages_pref; | |
379 StringPrefMember dictionary_language_pref; | |
380 accept_languages_pref.Init(prefs::kAcceptLanguages, profile->GetPrefs(), | |
381 NULL); | |
382 dictionary_language_pref.Init(prefs::kSpellCheckDictionary, | |
383 profile->GetPrefs(), NULL); | |
384 std::string dictionary_language = | |
385 WideToASCII(dictionary_language_pref.GetValue()); | |
386 | |
387 // The current dictionary language should be there. | |
388 languages->push_back(dictionary_language); | |
389 | |
390 // Now scan through the list of accept languages, and find possible mappings | |
391 // from this list to the existing list of spell check languages. | |
392 std::vector<std::string> accept_languages; | |
393 | |
394 if (SpellCheckerPlatform::SpellCheckerAvailable()) { | |
395 SpellCheckerPlatform::GetAvailableLanguages(&accept_languages); | |
396 } else { | |
397 SplitString(WideToASCII(accept_languages_pref.GetValue()), ',', | |
398 &accept_languages); | |
399 } | |
400 for (std::vector<std::string>::const_iterator i = accept_languages.begin(); | |
401 i != accept_languages.end(); ++i) { | |
402 std::string language = GetCorrespondingSpellCheckLanguage(*i); | |
403 if (!language.empty() && | |
404 std::find(languages->begin(), languages->end(), language) == | |
405 languages->end()) | |
406 languages->push_back(language); | |
407 } | |
408 | |
409 for (size_t i = 0; i < languages->size(); ++i) { | |
410 if ((*languages)[i] == dictionary_language) | |
411 return i; | |
412 } | |
413 return -1; | |
414 } | |
415 | |
416 FilePath SpellChecker::GetVersionedFileName(const std::string& input_language, | |
417 const FilePath& dict_dir) { | |
418 // The default dictionary version is 1-2. These versions have been augmented | |
419 // with additional words found by the translation team. | |
420 static const char kDefaultVersionString[] = "-1-2"; | |
421 | |
422 // The following dictionaries have either not been augmented with additional | |
423 // words (version 1-1) or have new words, as well as an upgraded dictionary | |
424 // as of Feb 2009 (version 1-3). | |
425 static const struct { | |
426 // The language input. | |
427 const char* language; | |
428 | |
429 // The corresponding version. | |
430 const char* version; | |
431 } special_version_string[] = { | |
432 {"en-AU", "-1-1"}, | |
433 {"en-GB", "-1-1"}, | |
434 {"es-ES", "-1-1"}, | |
435 {"nl-NL", "-1-1"}, | |
436 {"ru-RU", "-1-1"}, | |
437 {"sv-SE", "-1-1"}, | |
438 {"he-IL", "-1-1"}, | |
439 {"el-GR", "-1-1"}, | |
440 {"hi-IN", "-1-1"}, | |
441 {"tr-TR", "-1-1"}, | |
442 {"et-EE", "-1-1"}, | |
443 {"fr-FR", "-1-4"}, // to fix crash, fr dictionary was updated to 1.4 | |
444 {"lt-LT", "-1-3"}, | |
445 {"pl-PL", "-1-3"} | |
446 }; | |
447 | |
448 // Generate the bdict file name using default version string or special | |
449 // version string, depending on the language. | |
450 std::string language = GetSpellCheckLanguageRegion(input_language); | |
451 std::string versioned_bdict_file_name(language + kDefaultVersionString + | |
452 ".bdic"); | |
453 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(special_version_string); ++i) { | |
454 if (language == special_version_string[i].language) { | |
455 versioned_bdict_file_name = | |
456 language + special_version_string[i].version + ".bdic"; | |
457 break; | |
458 } | |
459 } | |
460 | |
461 return dict_dir.AppendASCII(versioned_bdict_file_name); | |
462 } | |
463 | |
464 SpellChecker::SpellChecker(const FilePath& dict_dir, | |
465 const std::string& language, | |
466 URLRequestContextGetter* request_context_getter, | |
467 const FilePath& custom_dictionary_file_name) | |
468 : given_dictionary_directory_(dict_dir), | |
469 custom_dictionary_file_name_(custom_dictionary_file_name), | |
470 tried_to_init_(false), | |
471 language_(language), | |
472 worker_loop_(NULL), | |
473 tried_to_download_dictionary_file_(false), | |
474 file_loop_(NULL), | |
475 request_context_getter_(request_context_getter), | |
476 obtaining_dictionary_(false), | |
477 auto_spell_correct_turned_on_(false), | |
478 is_using_platform_spelling_engine_(false), | |
479 fetcher_(NULL), | |
480 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { | |
481 if (SpellCheckerPlatform::SpellCheckerAvailable()) { | |
482 SpellCheckerPlatform::Init(); | |
483 if (SpellCheckerPlatform::PlatformSupportsLanguage(language)) { | |
484 // If we have reached here, then we know that the current platform | |
485 // supports the given language and we will use it instead of hunspell. | |
486 SpellCheckerPlatform::SetLanguage(language); | |
487 is_using_platform_spelling_engine_ = true; | |
488 } | |
489 } | |
490 | |
491 // Get the corresponding BDIC file name. | |
492 bdic_file_name_ = GetVersionedFileName(language, dict_dir).BaseName(); | |
493 | |
494 // Get File Loop - hunspell gets initialized here. | |
495 base::Thread* file_thread = g_browser_process->file_thread(); | |
496 if (file_thread) | |
497 file_loop_ = file_thread->message_loop(); | |
498 | |
499 // Get the path to the custom dictionary file. | |
500 if (custom_dictionary_file_name_.empty()) { | |
501 FilePath personal_file_directory; | |
502 PathService::Get(chrome::DIR_USER_DATA, &personal_file_directory); | |
503 custom_dictionary_file_name_ = | |
504 personal_file_directory.Append(chrome::kCustomDictionaryFileName); | |
505 } | |
506 | |
507 // Use this dictionary language as the default one of the | |
508 // SpellcheckCharAttribute object. | |
509 character_attributes_.SetDefaultLanguage(language); | |
510 } | |
511 | |
512 SpellChecker::~SpellChecker() { | |
513 // This must be deleted on the I/O thread (see the header). This is the same | |
514 // thread that SpellCheckWord is called on, so we verify that they were all | |
515 // the same thread. | |
516 if (worker_loop_) | |
517 DCHECK(MessageLoop::current() == worker_loop_); | |
518 } | |
519 | |
520 void SpellChecker::StartDictionaryDownload(const FilePath& file_name) { | |
521 // Determine URL of file to download. | |
522 static const char kDownloadServerUrl[] = | |
523 "http://cache.pack.google.com/edgedl/chrome/dict/"; | |
524 GURL url = GURL(std::string(kDownloadServerUrl) + WideToUTF8( | |
525 l10n_util::ToLower(bdic_file_name_.ToWStringHack()))); | |
526 fetcher_.reset(new URLFetcher(url, URLFetcher::GET, this)); | |
527 fetcher_->set_request_context(request_context_getter_); | |
528 obtaining_dictionary_ = true; | |
529 fetcher_->Start(); | |
530 } | |
531 | |
532 void SpellChecker::OnURLFetchComplete(const URLFetcher* source, | |
533 const GURL& url, | |
534 const URLRequestStatus& status, | |
535 int response_code, | |
536 const ResponseCookies& cookies, | |
537 const std::string& data) { | |
538 DCHECK(source); | |
539 if ((response_code / 100) != 2) { | |
540 obtaining_dictionary_ = false; | |
541 return; | |
542 } | |
543 | |
544 // Basic sanity check on the dictionary. | |
545 // There's the small chance that we might see a 200 status code for a body | |
546 // that represents some form of failure. | |
547 if (data.size() < 4 || data[0] != 'B' || data[1] != 'D' || data[2] != 'i' || | |
548 data[3] != 'c') { | |
549 obtaining_dictionary_ = false; | |
550 return; | |
551 } | |
552 | |
553 // Save the file in the file thread, and not here, the IO thread. | |
554 FilePath first_attempt_file_name = given_dictionary_directory_.Append( | |
555 bdic_file_name_); | |
556 FilePath user_data_dir = GetFallbackDictionaryDownloadDirectory(); | |
557 FilePath fallback_file_name = user_data_dir.Append(bdic_file_name_); | |
558 Task* dic_task = method_factory_. | |
559 NewRunnableMethod(&SpellChecker::OnDictionarySaveComplete); | |
560 file_loop_->PostTask(FROM_HERE, new SaveDictionaryTask(dic_task, | |
561 first_attempt_file_name, fallback_file_name, data)); | |
562 } | |
563 | |
564 void SpellChecker::OnDictionarySaveComplete() { | |
565 obtaining_dictionary_ = false; | |
566 // Now that the dictionary is downloaded, continue trying to download. | |
567 Initialize(); | |
568 } | |
569 | |
570 // Initialize SpellChecker. In this method, if the dictionary is not present | |
571 // in the local disk, it is fetched asynchronously. | |
572 bool SpellChecker::Initialize() { | |
573 if (!worker_loop_) | |
574 worker_loop_ = MessageLoop::current(); | |
575 else | |
576 DCHECK(worker_loop_ == MessageLoop::current()); | |
577 | |
578 // Return false if the dictionary files are downloading. | |
579 if (obtaining_dictionary_) | |
580 return false; | |
581 | |
582 // Return false if tried to init and failed - don't try multiple times in | |
583 // this session. | |
584 if (tried_to_init_) | |
585 return hunspell_.get() != NULL; | |
586 | |
587 StatsScope<StatsCounterTimer> timer(chrome::Counters::spellcheck_init()); | |
588 | |
589 // The default place whether the spellcheck dictionary can reside is | |
590 // chrome::DIR_APP_DICTIONARIES. However, for systemwide installations, | |
591 // this directory may not have permissions for download. In that case, the | |
592 // alternate directory for download is chrome::DIR_USER_DATA. We have to check | |
593 // for the spellcheck dictionaries in both the directories. If not found in | |
594 // either one, it has to be downloaded in either of the two. | |
595 // TODO(sidchat): Some sort of UI to warn users that spellchecker is not | |
596 // working at all (due to failed dictionary download)? | |
597 | |
598 // File name for downloading in DIR_APP_DICTIONARIES. | |
599 FilePath dictionary_file_name_app = GetVersionedFileName(language_, | |
600 given_dictionary_directory_); | |
601 | |
602 // Filename for downloading in the fallback dictionary download directory, | |
603 // DIR_USER_DATA. | |
604 FilePath dict_dir_userdata = GetFallbackDictionaryDownloadDirectory(); | |
605 FilePath dictionary_file_name_usr = GetVersionedFileName(language_, | |
606 dict_dir_userdata); | |
607 | |
608 // Balances Release() in HunspellInited(), or in UIProxyForIOTask if the IO | |
609 // thread is torn down before the ReadDictionaryTask calls us back. | |
610 AddRef(); | |
611 Task* task = new ReadDictionaryTask(this, | |
612 dictionary_file_name_app, dictionary_file_name_usr); | |
613 if (file_loop_) { | |
614 file_loop_->PostTask(FROM_HERE, task); | |
615 } else { | |
616 task->Run(); | |
617 delete task; | |
618 } | |
619 | |
620 return hunspell_.get() != NULL; | |
621 } | |
622 | |
623 void SpellChecker::HunspellInited(Hunspell* hunspell, | |
624 file_util::MemoryMappedFile* bdict_file, | |
625 bool file_existed) { | |
626 DCHECK(worker_loop_ == MessageLoop::current()); | |
627 | |
628 if (file_existed) | |
629 tried_to_init_ = true; | |
630 | |
631 if (!hunspell) { | |
632 if (!file_existed) { | |
633 // File didn't exist. We need to download a dictionary. | |
634 DoDictionaryDownload(); | |
635 } | |
636 | |
637 // Balances AddRef() in Initialize(). | |
638 Release(); | |
639 return; | |
640 } | |
641 | |
642 | |
643 bdict_file_.reset(bdict_file); | |
644 hunspell_.reset(hunspell); | |
645 // Add all the custom words we've gotten while Hunspell was loading. | |
646 while (!custom_words_.empty()) { | |
647 hunspell_->add(custom_words_.front().c_str()); | |
648 custom_words_.pop(); | |
649 } | |
650 | |
651 // Balances AddRef() in Initialize(). | |
652 Release(); | |
653 } | |
654 | |
655 void SpellChecker::DoDictionaryDownload() { | |
656 // Download the dictionary file. | |
657 if (file_loop_ && request_context_getter_) { | |
658 if (!tried_to_download_dictionary_file_) { | |
659 FilePath dictionary_file_name_app = GetVersionedFileName(language_, | |
660 given_dictionary_directory_); | |
661 StartDictionaryDownload(dictionary_file_name_app); | |
662 tried_to_download_dictionary_file_ = true; | |
663 } else { | |
664 // Don't try to download a dictionary more than once. | |
665 tried_to_init_ = true; | |
666 } | |
667 } else { | |
668 NOTREACHED(); | |
669 } | |
670 } | |
671 | |
672 string16 SpellChecker::GetAutoCorrectionWord(const string16& word, int tag) { | |
673 string16 autocorrect_word; | 97 string16 autocorrect_word; |
674 if (!auto_spell_correct_turned_on_) | 98 if (!auto_spell_correct_turned_on_) |
675 return autocorrect_word; // Return the empty string. | 99 return autocorrect_word; // Return the empty string. |
676 | 100 |
677 int word_length = static_cast<int>(word.size()); | 101 int word_length = static_cast<int>(word.size()); |
678 if (word_length < 2 || word_length > kMaxAutoCorrectWordSize) | 102 if (word_length < 2 || word_length > kMaxAutoCorrectWordSize) |
679 return autocorrect_word; | 103 return autocorrect_word; |
680 | 104 |
| 105 if (InitializeIfNeeded()) |
| 106 return autocorrect_word; |
| 107 |
681 char16 misspelled_word[kMaxAutoCorrectWordSize + 1]; | 108 char16 misspelled_word[kMaxAutoCorrectWordSize + 1]; |
682 const char16* word_char = word.c_str(); | 109 const char16* word_char = word.c_str(); |
683 for (int i = 0; i <= kMaxAutoCorrectWordSize; i++) { | 110 for (int i = 0; i <= kMaxAutoCorrectWordSize; i++) { |
684 if (i >= word_length) | 111 if (i >= word_length) |
685 misspelled_word[i] = NULL; | 112 misspelled_word[i] = NULL; |
686 else | 113 else |
687 misspelled_word[i] = word_char[i]; | 114 misspelled_word[i] = word_char[i]; |
688 } | 115 } |
689 | 116 |
690 // Swap adjacent characters and spellcheck. | 117 // Swap adjacent characters and spellcheck. |
(...skipping 17 matching lines...) Expand all Loading... |
708 break; | 135 break; |
709 } | 136 } |
710 } | 137 } |
711 | 138 |
712 // Restore the swapped characters. | 139 // Restore the swapped characters. |
713 std::swap(misspelled_word[i], misspelled_word[i + 1]); | 140 std::swap(misspelled_word[i], misspelled_word[i + 1]); |
714 } | 141 } |
715 return autocorrect_word; | 142 return autocorrect_word; |
716 } | 143 } |
717 | 144 |
718 void SpellChecker::EnableAutoSpellCorrect(bool turn_on) { | 145 void SpellCheck::EnableAutoSpellCorrect(bool turn_on) { |
719 auto_spell_correct_turned_on_ = turn_on; | 146 auto_spell_correct_turned_on_ = turn_on; |
720 } | 147 } |
721 | 148 |
722 // Returns whether or not the given string is a valid contraction. | 149 void SpellCheck::WordAdded(const std::string& word) { |
723 // This function is a fall-back when the SpellcheckWordIterator class | 150 if (is_using_platform_spelling_engine_) |
724 // returns a concatenated word which is not in the selected dictionary | 151 return; |
725 // (e.g. "in'n'out") but each word is valid. | |
726 bool SpellChecker::IsValidContraction(const string16& contraction, int tag) { | |
727 SpellcheckWordIterator word_iterator; | |
728 word_iterator.Initialize(&character_attributes_, contraction.c_str(), | |
729 contraction.length(), false); | |
730 | 152 |
731 string16 word; | 153 if (!hunspell_.get()) { |
732 int word_start; | 154 // Save it for later---add it when hunspell is initialized. |
733 int word_length; | 155 custom_words_.push_back(word); |
734 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) { | |
735 if (!CheckSpelling(word, tag)) | |
736 return false; | |
737 } | |
738 return true; | |
739 } | |
740 | |
741 bool SpellChecker::SpellCheckWord( | |
742 const char16* in_word, | |
743 int in_word_len, | |
744 int tag, | |
745 int* misspelling_start, | |
746 int* misspelling_len, | |
747 std::vector<string16>* optional_suggestions) { | |
748 DCHECK(in_word_len >= 0); | |
749 DCHECK(misspelling_start && misspelling_len) << "Out vars must be given."; | |
750 | |
751 // This must always be called on the same thread (normally the I/O thread). | |
752 if (worker_loop_) | |
753 DCHECK(MessageLoop::current() == worker_loop_); | |
754 | |
755 // Check if the platform spellchecker is being used. | |
756 if (!is_using_platform_spelling_engine_) { | |
757 // If it isn't, try and init hunspell. | |
758 Initialize(); | |
759 | |
760 // Check to see if hunspell was successfuly initialized. | |
761 if (!hunspell_.get()) | |
762 return true; // Unable to spellcheck, return word is OK. | |
763 } | |
764 | |
765 StatsScope<StatsRate> timer(chrome::Counters::spellcheck_lookup()); | |
766 | |
767 *misspelling_start = 0; | |
768 *misspelling_len = 0; | |
769 if (in_word_len == 0) | |
770 return true; // No input means always spelled correctly. | |
771 | |
772 SpellcheckWordIterator word_iterator; | |
773 string16 word; | |
774 int word_start; | |
775 int word_length; | |
776 word_iterator.Initialize(&character_attributes_, in_word, in_word_len, true); | |
777 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) { | |
778 // Found a word (or a contraction) that the spellchecker can check the | |
779 // spelling of. | |
780 bool word_ok = CheckSpelling(word, tag); | |
781 if (word_ok) | |
782 continue; | |
783 | |
784 // If the given word is a concatenated word of two or more valid words | |
785 // (e.g. "hello:hello"), we should treat it as a valid word. | |
786 if (IsValidContraction(word, tag)) | |
787 continue; | |
788 | |
789 *misspelling_start = word_start; | |
790 *misspelling_len = word_length; | |
791 | |
792 // Get the list of suggested words. | |
793 if (optional_suggestions) | |
794 FillSuggestionList(word, optional_suggestions); | |
795 return false; | |
796 } | |
797 | |
798 return true; | |
799 } | |
800 | |
801 // This task is called in the file loop to write the new word to the custom | |
802 // dictionary in disc. | |
803 class AddWordToCustomDictionaryTask : public Task { | |
804 public: | |
805 AddWordToCustomDictionaryTask(const FilePath& file_name, | |
806 const string16& word) | |
807 : file_name_(file_name), | |
808 word_(UTF16ToUTF8(word)) { | |
809 } | |
810 | |
811 private: | |
812 void Run(); | |
813 | |
814 FilePath file_name_; | |
815 std::string word_; | |
816 }; | |
817 | |
818 void AddWordToCustomDictionaryTask::Run() { | |
819 // Add the word with a new line. Note that, although this would mean an | |
820 // extra line after the list of words, this is potentially harmless and | |
821 // faster, compared to verifying everytime whether to append a new line | |
822 // or not. | |
823 word_ += "\n"; | |
824 FILE* f = file_util::OpenFile(file_name_, "a+"); | |
825 if (f != NULL) | |
826 fputs(word_.c_str(), f); | |
827 file_util::CloseFile(f); | |
828 } | |
829 | |
830 void SpellChecker::AddWord(const string16& word) { | |
831 if (is_using_platform_spelling_engine_) { | |
832 SpellCheckerPlatform::AddWord(word); | |
833 return; | |
834 } | |
835 | |
836 // Check if the |hunspell_| has been initialized at all. | |
837 Initialize(); | |
838 | |
839 // Add the word to hunspell. | |
840 std::string word_to_add = UTF16ToUTF8(word); | |
841 // Don't attempt to add an empty word, or one larger than Hunspell can handle | |
842 if (!word_to_add.empty() && word_to_add.length() < MAXWORDUTF8LEN) { | |
843 // Either add the word to |hunspell_|, or, if |hunspell_| is still loading, | |
844 // defer it till after the load completes. | |
845 if (hunspell_.get()) | |
846 hunspell_->add(word_to_add.c_str()); | |
847 else | |
848 custom_words_.push(word_to_add); | |
849 } | |
850 | |
851 // Now add the word to the custom dictionary file. | |
852 Task* write_word_task = | |
853 new AddWordToCustomDictionaryTask(custom_dictionary_file_name_, word); | |
854 if (file_loop_) { | |
855 file_loop_->PostTask(FROM_HERE, write_word_task); | |
856 } else { | 156 } else { |
857 write_word_task->Run(); | 157 AddWordToHunspell(word); |
858 delete write_word_task; | |
859 } | 158 } |
860 } | 159 } |
861 | 160 |
862 bool SpellChecker::CheckSpelling(const string16& word_to_check, int tag) { | 161 void SpellCheck::InitializeHunspell() { |
| 162 if (hunspell_.get()) |
| 163 return; |
| 164 |
| 165 bdict_file_.reset(new file_util::MemoryMappedFile); |
| 166 |
| 167 if (bdict_file_->Initialize(fd_)) { |
| 168 TimeTicks start_time = TimeTicks::Now(); |
| 169 |
| 170 hunspell_.reset( |
| 171 new Hunspell(bdict_file_->data(), bdict_file_->length())); |
| 172 |
| 173 // Add custom words to Hunspell. |
| 174 for (std::vector<std::string>::iterator it = custom_words_.begin(); |
| 175 it != custom_words_.end(); ++it) { |
| 176 AddWordToHunspell(*it); |
| 177 } |
| 178 |
| 179 DHISTOGRAM_TIMES("Spellcheck.InitTime", |
| 180 TimeTicks::Now() - start_time); |
| 181 } |
| 182 } |
| 183 |
| 184 void SpellCheck::AddWordToHunspell(const std::string& word) { |
| 185 if (!word.empty() && word.length() < MAXWORDUTF8LEN) |
| 186 hunspell_->add(word.c_str()); |
| 187 } |
| 188 |
| 189 bool SpellCheck::InitializeIfNeeded() { |
| 190 if (!initialized_) { |
| 191 RenderThread::current()->RequestSpellCheckDictionary(); |
| 192 initialized_ = true; |
| 193 return true; |
| 194 } |
| 195 |
| 196 // Check if the platform spellchecker is being used. |
| 197 if (!is_using_platform_spelling_engine_ && fd_.fd != -1) { |
| 198 // If it isn't, init hunspell. |
| 199 InitializeHunspell(); |
| 200 } |
| 201 |
| 202 return false; |
| 203 } |
| 204 |
| 205 // When called, relays the request to check the spelling to the proper |
| 206 // backend, either hunspell or a platform-specific backend. |
| 207 bool SpellCheck::CheckSpelling(const string16& word_to_check, int tag) { |
863 bool word_correct = false; | 208 bool word_correct = false; |
864 | 209 |
865 TimeTicks begin_time = TimeTicks::Now(); | |
866 if (is_using_platform_spelling_engine_) { | 210 if (is_using_platform_spelling_engine_) { |
867 word_correct = SpellCheckerPlatform::CheckSpelling(word_to_check, tag); | 211 // TODO(estade): sync IPC to browser. |
| 212 word_correct = true; |
868 } else { | 213 } else { |
869 std::string word_to_check_utf8(UTF16ToUTF8(word_to_check)); | 214 std::string word_to_check_utf8(UTF16ToUTF8(word_to_check)); |
870 // Hunspell shouldn't let us exceed its max, but check just in case | 215 // Hunspell shouldn't let us exceed its max, but check just in case |
871 if (word_to_check_utf8.length() < MAXWORDUTF8LEN) { | 216 if (word_to_check_utf8.length() < MAXWORDUTF8LEN) { |
872 // |hunspell_->spell| returns 0 if the word is spelled correctly and | 217 // |hunspell_->spell| returns 0 if the word is spelled correctly and |
873 // non-zero otherwsie. | 218 // non-zero otherwsie. |
874 word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0); | 219 word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0); |
875 } | 220 } |
876 } | 221 } |
877 DHISTOGRAM_TIMES("Spellcheck.CheckTime", TimeTicks::Now() - begin_time); | |
878 | 222 |
879 return word_correct; | 223 return word_correct; |
880 } | 224 } |
881 | 225 |
882 void SpellChecker::FillSuggestionList( | 226 void SpellCheck::FillSuggestionList( |
883 const string16& wrong_word, | 227 const string16& wrong_word, |
884 std::vector<string16>* optional_suggestions) { | 228 std::vector<string16>* optional_suggestions) { |
885 if (is_using_platform_spelling_engine_) { | 229 if (is_using_platform_spelling_engine_) { |
886 SpellCheckerPlatform::FillSuggestionList(wrong_word, optional_suggestions); | 230 // TODO(estade): sync IPC to browser. |
887 return; | 231 return; |
888 } | 232 } |
889 char** suggestions; | 233 char** suggestions; |
890 TimeTicks begin_time = TimeTicks::Now(); | 234 int number_of_suggestions = |
891 int number_of_suggestions = hunspell_->suggest(&suggestions, | 235 hunspell_->suggest(&suggestions, UTF16ToUTF8(wrong_word).c_str()); |
892 UTF16ToUTF8(wrong_word).c_str()); | |
893 DHISTOGRAM_TIMES("Spellcheck.SuggestTime", | |
894 TimeTicks::Now() - begin_time); | |
895 | 236 |
896 // Populate the vector of WideStrings. | 237 // Populate the vector of WideStrings. |
897 for (int i = 0; i < number_of_suggestions; i++) { | 238 for (int i = 0; i < number_of_suggestions; i++) { |
898 if (i < kMaxSuggestions) | 239 if (i < kMaxSuggestions) |
899 optional_suggestions->push_back(UTF8ToUTF16(suggestions[i])); | 240 optional_suggestions->push_back(UTF8ToUTF16(suggestions[i])); |
900 free(suggestions[i]); | 241 free(suggestions[i]); |
901 } | 242 } |
902 if (suggestions != NULL) | 243 if (suggestions != NULL) |
903 free(suggestions); | 244 free(suggestions); |
904 } | 245 } |
| 246 |
| 247 // Returns whether or not the given string is a valid contraction. |
| 248 // This function is a fall-back when the SpellcheckWordIterator class |
| 249 // returns a concatenated word which is not in the selected dictionary |
| 250 // (e.g. "in'n'out") but each word is valid. |
| 251 bool SpellCheck::IsValidContraction(const string16& contraction, int tag) { |
| 252 SpellcheckWordIterator word_iterator; |
| 253 word_iterator.Initialize(&character_attributes_, contraction.c_str(), |
| 254 contraction.length(), false); |
| 255 |
| 256 string16 word; |
| 257 int word_start; |
| 258 int word_length; |
| 259 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) { |
| 260 if (!CheckSpelling(word, tag)) |
| 261 return false; |
| 262 } |
| 263 return true; |
| 264 } |
OLD | NEW |