OLD | NEW |
| (Empty) |
1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/spellchecker.h" | |
6 | |
7 #include "app/l10n_util.h" | |
8 #include "base/basictypes.h" | |
9 #include "base/compiler_specific.h" | |
10 #include "base/file_util.h" | |
11 #include "base/histogram.h" | |
12 #include "base/logging.h" | |
13 #include "base/path_service.h" | |
14 #include "base/stats_counters.h" | |
15 #include "base/string_util.h" | |
16 #include "chrome/browser/net/url_fetcher.h" | |
17 #include "chrome/browser/profile.h" | |
18 #include "chrome/browser/spellchecker_common.h" | |
19 #include "chrome/browser/spellchecker_platform_engine.h" | |
20 #include "chrome/common/chrome_constants.h" | |
21 #include "chrome/common/chrome_counters.h" | |
22 #include "chrome/common/chrome_paths.h" | |
23 #include "chrome/common/pref_names.h" | |
24 #include "chrome/common/pref_service.h" | |
25 #include "grit/generated_resources.h" | |
26 #include "grit/locale_settings.h" | |
27 #include "net/url_request/url_request.h" | |
28 #include "third_party/hunspell/src/hunspell/hunspell.hxx" | |
29 | |
30 using base::TimeTicks; | |
31 | |
32 namespace { | |
33 | |
34 static const struct { | |
35 // The language. | |
36 const char* language; | |
37 | |
38 // The corresponding language and region, used by the dictionaries. | |
39 const char* language_region; | |
40 } g_supported_spellchecker_languages[] = { | |
41 {"en-US", "en-US"}, | |
42 {"en-GB", "en-GB"}, | |
43 {"en-AU", "en-AU"}, | |
44 {"fr", "fr-FR"}, | |
45 {"it", "it-IT"}, | |
46 {"de", "de-DE"}, | |
47 {"es", "es-ES"}, | |
48 {"nl", "nl-NL"}, | |
49 {"pt-BR", "pt-BR"}, | |
50 {"ru", "ru-RU"}, | |
51 {"pl", "pl-PL"}, | |
52 // {"th", "th-TH"}, // Not to be included in Spellchecker as per B=1277824 | |
53 {"sv", "sv-SE"}, | |
54 {"da", "da-DK"}, | |
55 {"pt-PT", "pt-PT"}, | |
56 {"ro", "ro-RO"}, | |
57 // {"hu", "hu-HU"}, // Not to be included in Spellchecker as per B=1277824 | |
58 {"he", "he-IL"}, | |
59 {"id", "id-ID"}, | |
60 {"cs", "cs-CZ"}, | |
61 {"el", "el-GR"}, | |
62 {"nb", "nb-NO"}, | |
63 {"vi", "vi-VN"}, | |
64 // {"bg", "bg-BG"}, // Not to be included in Spellchecker as per B=1277824 | |
65 {"hr", "hr-HR"}, | |
66 {"lt", "lt-LT"}, | |
67 {"sk", "sk-SK"}, | |
68 {"sl", "sl-SI"}, | |
69 {"ca", "ca-ES"}, | |
70 {"lv", "lv-LV"}, | |
71 // {"uk", "uk-UA"}, // Not to be included in Spellchecker as per B=1277824 | |
72 {"hi", "hi-IN"}, | |
73 {"et", "et-EE"}, | |
74 {"tr", "tr-TR"}, | |
75 }; | |
76 | |
77 // Get the fallback folder (currently chrome::DIR_USER_DATA) where the | |
78 // dictionary is downloaded in case of system-wide installations. | |
79 FilePath GetFallbackDictionaryDownloadDirectory() { | |
80 FilePath dict_dir_userdata; | |
81 PathService::Get(chrome::DIR_USER_DATA, &dict_dir_userdata); | |
82 dict_dir_userdata = dict_dir_userdata.AppendASCII("Dictionaries"); | |
83 return dict_dir_userdata; | |
84 } | |
85 | |
86 bool SaveBufferToFile(const std::string& data, | |
87 FilePath file_to_write) { | |
88 int num_bytes = data.length(); | |
89 return file_util::WriteFile(file_to_write, data.data(), num_bytes) == | |
90 num_bytes; | |
91 } | |
92 | |
93 } // namespace | |
94 | |
95 // Design: The spellchecker initializes hunspell_ in the Initialize() method. | |
96 // This is done using the dictionary file on disk, e.g. "en-US_1_1.bdic". | |
97 // Initialization of hunspell_ is held off during this process. If the | |
98 // dictionary is not available, we first attempt to download and save it. After | |
99 // the dictionary is downloaded and saved to disk (or the attempt to do so | |
100 // fails)), corresponding flags are set in spellchecker - in the IO thread. | |
101 // After the flags are cleared, a (final) attempt is made to initialize | |
102 // hunspell_. If it fails even then (dictionary could not download), no more | |
103 // attempts are made to initialize it. | |
104 class SaveDictionaryTask : public Task { | |
105 public: | |
106 SaveDictionaryTask(Task* on_dictionary_save_complete_callback_task, | |
107 const FilePath& first_attempt_file_name, | |
108 const FilePath& fallback_file_name, | |
109 const std::string& data) | |
110 : on_dictionary_save_complete_callback_task_( | |
111 on_dictionary_save_complete_callback_task), | |
112 first_attempt_file_name_(first_attempt_file_name), | |
113 fallback_file_name_(fallback_file_name), | |
114 data_(data) { | |
115 } | |
116 | |
117 private: | |
118 void Run(); | |
119 | |
120 bool SaveBufferToFile(const std::string& data, | |
121 FilePath file_to_write) { | |
122 int num_bytes = data.length(); | |
123 return file_util::WriteFile(file_to_write, data.data(), num_bytes) == | |
124 num_bytes; | |
125 } | |
126 | |
127 // factory object to invokelater back to spellchecker in io thread on | |
128 // download completion to change appropriate flags. | |
129 Task* on_dictionary_save_complete_callback_task_; | |
130 | |
131 // The file which will be stored in the first attempt. | |
132 FilePath first_attempt_file_name_; | |
133 | |
134 // The file which will be stored as a fallback. | |
135 FilePath fallback_file_name_; | |
136 | |
137 // The buffer which has to be stored to disk. | |
138 std::string data_; | |
139 | |
140 // This invokes back to io loop when downloading is over. | |
141 DISALLOW_COPY_AND_ASSIGN(SaveDictionaryTask); | |
142 }; | |
143 | |
144 void SaveDictionaryTask::Run() { | |
145 if (!SaveBufferToFile(data_, first_attempt_file_name_)) { | |
146 // Try saving it to |fallback_file_name_|, which almost surely has | |
147 // write permission. If even this fails, there is nothing to be done. | |
148 FilePath fallback_dir = fallback_file_name_.DirName(); | |
149 // Create the directory if it does not exist. | |
150 if (!file_util::PathExists(fallback_dir)) | |
151 file_util::CreateDirectory(fallback_dir); | |
152 SaveBufferToFile(data_, fallback_file_name_); | |
153 } // Unsuccessful save is taken care of in SpellChecker::Initialize(). | |
154 | |
155 // Set Flag that dictionary is not downloading anymore. | |
156 ChromeThread::PostTask( | |
157 ChromeThread::IO, FROM_HERE, on_dictionary_save_complete_callback_task_); | |
158 } | |
159 | |
160 // Design: this task tries to read the dictionary from disk and load it into | |
161 // memory. It is executed on the file thread, and posts the results back to | |
162 // the IO thread. | |
163 // The task first checks for the existence of the dictionary in one of the two | |
164 // given locations. If it does not exist, the task informs the SpellChecker, | |
165 // which will try to download the directory and run a new ReadDictionaryTask. | |
166 class ReadDictionaryTask : public Task { | |
167 public: | |
168 ReadDictionaryTask(SpellChecker* spellchecker, | |
169 const FilePath& dict_file_name_app, | |
170 const FilePath& dict_file_name_usr) | |
171 : spellchecker_(spellchecker), | |
172 hunspell_(NULL), | |
173 bdict_file_(NULL), | |
174 custom_dictionary_file_name_( | |
175 spellchecker->custom_dictionary_file_name_), | |
176 dict_file_name_app_(dict_file_name_app), | |
177 dict_file_name_usr_(dict_file_name_usr) { | |
178 } | |
179 | |
180 virtual void Run() { | |
181 FilePath bdict_file_path; | |
182 if (file_util::PathExists(dict_file_name_app_)) { | |
183 bdict_file_path = dict_file_name_app_; | |
184 } else if (file_util::PathExists(dict_file_name_usr_)) { | |
185 bdict_file_path = dict_file_name_usr_; | |
186 } else { | |
187 Finish(false); | |
188 return; | |
189 } | |
190 | |
191 bdict_file_ = new file_util::MemoryMappedFile; | |
192 if (bdict_file_->Initialize(bdict_file_path)) { | |
193 TimeTicks start_time = TimeTicks::Now(); | |
194 | |
195 hunspell_ = | |
196 new Hunspell(bdict_file_->data(), bdict_file_->length()); | |
197 | |
198 // Add custom words to Hunspell. | |
199 std::string contents; | |
200 file_util::ReadFileToString(custom_dictionary_file_name_, &contents); | |
201 std::vector<std::string> list_of_words; | |
202 SplitString(contents, '\n', &list_of_words); | |
203 for (std::vector<std::string>::iterator it = list_of_words.begin(); | |
204 it != list_of_words.end(); ++it) { | |
205 hunspell_->add(it->c_str()); | |
206 } | |
207 | |
208 DHISTOGRAM_TIMES("Spellcheck.InitTime", | |
209 TimeTicks::Now() - start_time); | |
210 } else { | |
211 delete bdict_file_; | |
212 bdict_file_ = NULL; | |
213 } | |
214 | |
215 Finish(true); | |
216 } | |
217 | |
218 private: | |
219 void Finish(bool file_existed) { | |
220 ChromeThread::PostTask( | |
221 ChromeThread::IO, FROM_HERE, | |
222 NewRunnableMethod( | |
223 spellchecker_.get(), &SpellChecker::HunspellInited, hunspell_, | |
224 bdict_file_, file_existed)); | |
225 } | |
226 | |
227 // The SpellChecker we are working for. | |
228 scoped_refptr<SpellChecker> spellchecker_; | |
229 Hunspell* hunspell_; | |
230 file_util::MemoryMappedFile* bdict_file_; | |
231 | |
232 FilePath custom_dictionary_file_name_; | |
233 FilePath dict_file_name_app_; | |
234 FilePath dict_file_name_usr_; | |
235 | |
236 DISALLOW_COPY_AND_ASSIGN(ReadDictionaryTask); | |
237 }; | |
238 | |
239 void SpellChecker::SpellCheckLanguages(std::vector<std::string>* languages) { | |
240 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
241 ++i) { | |
242 languages->push_back(g_supported_spellchecker_languages[i].language); | |
243 } | |
244 } | |
245 | |
246 // This function returns the language-region version of language name. | |
247 // e.g. returns hi-IN for hi. | |
248 std::string SpellChecker::GetSpellCheckLanguageRegion( | |
249 std::string input_language) { | |
250 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
251 ++i) { | |
252 std::string language( | |
253 g_supported_spellchecker_languages[i].language); | |
254 if (language == input_language) | |
255 return std::string( | |
256 g_supported_spellchecker_languages[i].language_region); | |
257 } | |
258 | |
259 return input_language; | |
260 } | |
261 | |
262 | |
263 std::string SpellChecker::GetLanguageFromLanguageRegion( | |
264 std::string input_language) { | |
265 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
266 ++i) { | |
267 std::string language( | |
268 g_supported_spellchecker_languages[i].language_region); | |
269 if (language == input_language) | |
270 return std::string(g_supported_spellchecker_languages[i].language); | |
271 } | |
272 | |
273 return input_language; | |
274 } | |
275 | |
276 std::string SpellChecker::GetCorrespondingSpellCheckLanguage( | |
277 const std::string& language) { | |
278 // Look for exact match in the Spell Check language list. | |
279 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
280 ++i) { | |
281 // First look for exact match in the language region of the list. | |
282 std::string spellcheck_language( | |
283 g_supported_spellchecker_languages[i].language); | |
284 if (spellcheck_language == language) | |
285 return language; | |
286 | |
287 // Next, look for exact match in the language_region part of the list. | |
288 std::string spellcheck_language_region( | |
289 g_supported_spellchecker_languages[i].language_region); | |
290 if (spellcheck_language_region == language) | |
291 return g_supported_spellchecker_languages[i].language; | |
292 } | |
293 | |
294 // Look for a match by comparing only language parts. All the 'en-RR' | |
295 // except for 'en-GB' exactly matched in the above loop, will match | |
296 // 'en-US'. This is not ideal because 'en-ZA', 'en-NZ' had | |
297 // better be matched with 'en-GB'. This does not handle cases like | |
298 // 'az-Latn-AZ' vs 'az-Arab-AZ', either, but we don't use 3-part | |
299 // locale ids with a script code in the middle, yet. | |
300 // TODO(jungshik): Add a better fallback. | |
301 std::string language_part(language, 0, language.find('-')); | |
302 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages); | |
303 ++i) { | |
304 std::string spellcheck_language( | |
305 g_supported_spellchecker_languages[i].language_region); | |
306 if (spellcheck_language.substr(0, spellcheck_language.find('-')) == | |
307 language_part) | |
308 return spellcheck_language; | |
309 } | |
310 | |
311 // No match found - return blank. | |
312 return std::string(); | |
313 } | |
314 | |
315 // static | |
316 int SpellChecker::GetSpellCheckLanguages( | |
317 Profile* profile, | |
318 std::vector<std::string>* languages) { | |
319 StringPrefMember accept_languages_pref; | |
320 StringPrefMember dictionary_language_pref; | |
321 accept_languages_pref.Init(prefs::kAcceptLanguages, profile->GetPrefs(), | |
322 NULL); | |
323 dictionary_language_pref.Init(prefs::kSpellCheckDictionary, | |
324 profile->GetPrefs(), NULL); | |
325 std::string dictionary_language = | |
326 WideToASCII(dictionary_language_pref.GetValue()); | |
327 | |
328 // The current dictionary language should be there. | |
329 languages->push_back(dictionary_language); | |
330 | |
331 // Now scan through the list of accept languages, and find possible mappings | |
332 // from this list to the existing list of spell check languages. | |
333 std::vector<std::string> accept_languages; | |
334 | |
335 if (SpellCheckerPlatform::SpellCheckerAvailable()) { | |
336 SpellCheckerPlatform::GetAvailableLanguages(&accept_languages); | |
337 } else { | |
338 SplitString(WideToASCII(accept_languages_pref.GetValue()), ',', | |
339 &accept_languages); | |
340 } | |
341 for (std::vector<std::string>::const_iterator i = accept_languages.begin(); | |
342 i != accept_languages.end(); ++i) { | |
343 std::string language = GetCorrespondingSpellCheckLanguage(*i); | |
344 if (!language.empty() && | |
345 std::find(languages->begin(), languages->end(), language) == | |
346 languages->end()) | |
347 languages->push_back(language); | |
348 } | |
349 | |
350 for (size_t i = 0; i < languages->size(); ++i) { | |
351 if ((*languages)[i] == dictionary_language) | |
352 return i; | |
353 } | |
354 return -1; | |
355 } | |
356 | |
357 FilePath SpellChecker::GetVersionedFileName(const std::string& input_language, | |
358 const FilePath& dict_dir) { | |
359 // The default dictionary version is 1-2. These versions have been augmented | |
360 // with additional words found by the translation team. | |
361 static const char kDefaultVersionString[] = "-1-2"; | |
362 | |
363 // The following dictionaries have either not been augmented with additional | |
364 // words (version 1-1) or have new words, as well as an upgraded dictionary | |
365 // as of Feb 2009 (version 1-3). | |
366 static const struct { | |
367 // The language input. | |
368 const char* language; | |
369 | |
370 // The corresponding version. | |
371 const char* version; | |
372 } special_version_string[] = { | |
373 {"en-AU", "-1-1"}, | |
374 {"en-GB", "-1-1"}, | |
375 {"es-ES", "-1-1"}, | |
376 {"nl-NL", "-1-1"}, | |
377 {"ru-RU", "-1-1"}, | |
378 {"sv-SE", "-1-1"}, | |
379 {"he-IL", "-1-1"}, | |
380 {"el-GR", "-1-1"}, | |
381 {"hi-IN", "-1-1"}, | |
382 {"tr-TR", "-1-1"}, | |
383 {"et-EE", "-1-1"}, | |
384 {"fr-FR", "-1-4"}, // to fix crash, fr dictionary was updated to 1.4 | |
385 {"lt-LT", "-1-3"}, | |
386 {"pl-PL", "-1-3"} | |
387 }; | |
388 | |
389 // Generate the bdict file name using default version string or special | |
390 // version string, depending on the language. | |
391 std::string language = GetSpellCheckLanguageRegion(input_language); | |
392 std::string versioned_bdict_file_name(language + kDefaultVersionString + | |
393 ".bdic"); | |
394 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(special_version_string); ++i) { | |
395 if (language == special_version_string[i].language) { | |
396 versioned_bdict_file_name = | |
397 language + special_version_string[i].version + ".bdic"; | |
398 break; | |
399 } | |
400 } | |
401 | |
402 return dict_dir.AppendASCII(versioned_bdict_file_name); | |
403 } | |
404 | |
405 SpellChecker::SpellChecker(const FilePath& dict_dir, | |
406 const std::string& language, | |
407 URLRequestContextGetter* request_context_getter, | |
408 const FilePath& custom_dictionary_file_name) | |
409 : given_dictionary_directory_(dict_dir), | |
410 custom_dictionary_file_name_(custom_dictionary_file_name), | |
411 tried_to_init_(false), | |
412 language_(language), | |
413 tried_to_download_dictionary_file_(false), | |
414 request_context_getter_(request_context_getter), | |
415 obtaining_dictionary_(false), | |
416 auto_spell_correct_turned_on_(false), | |
417 is_using_platform_spelling_engine_(false), | |
418 fetcher_(NULL), | |
419 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { | |
420 if (SpellCheckerPlatform::SpellCheckerAvailable()) { | |
421 SpellCheckerPlatform::Init(); | |
422 if (SpellCheckerPlatform::PlatformSupportsLanguage(language)) { | |
423 // If we have reached here, then we know that the current platform | |
424 // supports the given language and we will use it instead of hunspell. | |
425 SpellCheckerPlatform::SetLanguage(language); | |
426 is_using_platform_spelling_engine_ = true; | |
427 } | |
428 } | |
429 | |
430 // Get the corresponding BDIC file name. | |
431 bdic_file_name_ = GetVersionedFileName(language, dict_dir).BaseName(); | |
432 | |
433 // Get the path to the custom dictionary file. | |
434 if (custom_dictionary_file_name_.empty()) { | |
435 FilePath personal_file_directory; | |
436 PathService::Get(chrome::DIR_USER_DATA, &personal_file_directory); | |
437 custom_dictionary_file_name_ = | |
438 personal_file_directory.Append(chrome::kCustomDictionaryFileName); | |
439 } | |
440 | |
441 // Use this dictionary language as the default one of the | |
442 // SpellcheckCharAttribute object. | |
443 character_attributes_.SetDefaultLanguage(language); | |
444 } | |
445 | |
446 SpellChecker::~SpellChecker() { | |
447 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); | |
448 } | |
449 | |
450 void SpellChecker::StartDictionaryDownload(const FilePath& file_name) { | |
451 // Determine URL of file to download. | |
452 static const char kDownloadServerUrl[] = | |
453 "http://cache.pack.google.com/edgedl/chrome/dict/"; | |
454 GURL url = GURL(std::string(kDownloadServerUrl) + WideToUTF8( | |
455 l10n_util::ToLower(bdic_file_name_.ToWStringHack()))); | |
456 fetcher_.reset(new URLFetcher(url, URLFetcher::GET, this)); | |
457 fetcher_->set_request_context(request_context_getter_); | |
458 obtaining_dictionary_ = true; | |
459 fetcher_->Start(); | |
460 } | |
461 | |
462 void SpellChecker::OnURLFetchComplete(const URLFetcher* source, | |
463 const GURL& url, | |
464 const URLRequestStatus& status, | |
465 int response_code, | |
466 const ResponseCookies& cookies, | |
467 const std::string& data) { | |
468 DCHECK(source); | |
469 if ((response_code / 100) != 2) { | |
470 obtaining_dictionary_ = false; | |
471 return; | |
472 } | |
473 | |
474 // Basic sanity check on the dictionary. | |
475 // There's the small chance that we might see a 200 status code for a body | |
476 // that represents some form of failure. | |
477 if (data.size() < 4 || data[0] != 'B' || data[1] != 'D' || data[2] != 'i' || | |
478 data[3] != 'c') { | |
479 obtaining_dictionary_ = false; | |
480 return; | |
481 } | |
482 | |
483 // Save the file in the file thread, and not here, the IO thread. | |
484 FilePath first_attempt_file_name = given_dictionary_directory_.Append( | |
485 bdic_file_name_); | |
486 FilePath user_data_dir = GetFallbackDictionaryDownloadDirectory(); | |
487 FilePath fallback_file_name = user_data_dir.Append(bdic_file_name_); | |
488 Task* dic_task = method_factory_. | |
489 NewRunnableMethod(&SpellChecker::OnDictionarySaveComplete); | |
490 ChromeThread::PostTask( | |
491 ChromeThread::FILE, FROM_HERE, | |
492 new SaveDictionaryTask( | |
493 dic_task, first_attempt_file_name, fallback_file_name, data)); | |
494 } | |
495 | |
496 void SpellChecker::OnDictionarySaveComplete() { | |
497 obtaining_dictionary_ = false; | |
498 // Now that the dictionary is downloaded, continue trying to download. | |
499 Initialize(); | |
500 } | |
501 | |
502 // Initialize SpellChecker. In this method, if the dictionary is not present | |
503 // in the local disk, it is fetched asynchronously. | |
504 bool SpellChecker::Initialize() { | |
505 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); | |
506 | |
507 // Return false if the dictionary files are downloading. | |
508 if (obtaining_dictionary_) | |
509 return false; | |
510 | |
511 // Return false if tried to init and failed - don't try multiple times in | |
512 // this session. | |
513 if (tried_to_init_) | |
514 return hunspell_.get() != NULL; | |
515 | |
516 StatsScope<StatsCounterTimer> timer(chrome::Counters::spellcheck_init()); | |
517 | |
518 // The default place whether the spellcheck dictionary can reside is | |
519 // chrome::DIR_APP_DICTIONARIES. However, for systemwide installations, | |
520 // this directory may not have permissions for download. In that case, the | |
521 // alternate directory for download is chrome::DIR_USER_DATA. We have to check | |
522 // for the spellcheck dictionaries in both the directories. If not found in | |
523 // either one, it has to be downloaded in either of the two. | |
524 // TODO(sidchat): Some sort of UI to warn users that spellchecker is not | |
525 // working at all (due to failed dictionary download)? | |
526 | |
527 // File name for downloading in DIR_APP_DICTIONARIES. | |
528 FilePath dictionary_file_name_app = GetVersionedFileName(language_, | |
529 given_dictionary_directory_); | |
530 | |
531 // Filename for downloading in the fallback dictionary download directory, | |
532 // DIR_USER_DATA. | |
533 FilePath dict_dir_userdata = GetFallbackDictionaryDownloadDirectory(); | |
534 FilePath dictionary_file_name_usr = GetVersionedFileName(language_, | |
535 dict_dir_userdata); | |
536 | |
537 ChromeThread::PostTask( | |
538 ChromeThread::FILE, FROM_HERE, | |
539 new ReadDictionaryTask( | |
540 this, dictionary_file_name_app, dictionary_file_name_usr)); | |
541 | |
542 return hunspell_.get() != NULL; | |
543 } | |
544 | |
545 void SpellChecker::HunspellInited(Hunspell* hunspell, | |
546 file_util::MemoryMappedFile* bdict_file, | |
547 bool file_existed) { | |
548 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); | |
549 | |
550 if (file_existed) | |
551 tried_to_init_ = true; | |
552 | |
553 if (!hunspell) { | |
554 if (!file_existed) { | |
555 // File didn't exist. We need to download a dictionary. | |
556 DoDictionaryDownload(); | |
557 } | |
558 return; | |
559 } | |
560 | |
561 | |
562 bdict_file_.reset(bdict_file); | |
563 hunspell_.reset(hunspell); | |
564 // Add all the custom words we've gotten while Hunspell was loading. | |
565 while (!custom_words_.empty()) { | |
566 hunspell_->add(custom_words_.front().c_str()); | |
567 custom_words_.pop(); | |
568 } | |
569 } | |
570 | |
571 void SpellChecker::DoDictionaryDownload() { | |
572 // Download the dictionary file. | |
573 if (request_context_getter_) { | |
574 if (!tried_to_download_dictionary_file_) { | |
575 FilePath dictionary_file_name_app = GetVersionedFileName(language_, | |
576 given_dictionary_directory_); | |
577 StartDictionaryDownload(dictionary_file_name_app); | |
578 tried_to_download_dictionary_file_ = true; | |
579 } else { | |
580 // Don't try to download a dictionary more than once. | |
581 tried_to_init_ = true; | |
582 } | |
583 } else { | |
584 NOTREACHED(); | |
585 } | |
586 } | |
587 | |
588 string16 SpellChecker::GetAutoCorrectionWord(const string16& word, int tag) { | |
589 string16 autocorrect_word; | |
590 if (!auto_spell_correct_turned_on_) | |
591 return autocorrect_word; // Return the empty string. | |
592 | |
593 int word_length = static_cast<int>(word.size()); | |
594 if (word_length < 2 || word_length > kMaxAutoCorrectWordSize) | |
595 return autocorrect_word; | |
596 | |
597 char16 misspelled_word[kMaxAutoCorrectWordSize + 1]; | |
598 const char16* word_char = word.c_str(); | |
599 for (int i = 0; i <= kMaxAutoCorrectWordSize; i++) { | |
600 if (i >= word_length) | |
601 misspelled_word[i] = NULL; | |
602 else | |
603 misspelled_word[i] = word_char[i]; | |
604 } | |
605 | |
606 // Swap adjacent characters and spellcheck. | |
607 int misspelling_start, misspelling_len; | |
608 for (int i = 0; i < word_length - 1; i++) { | |
609 // Swap. | |
610 std::swap(misspelled_word[i], misspelled_word[i + 1]); | |
611 | |
612 // Check spelling. | |
613 misspelling_start = misspelling_len = 0; | |
614 SpellCheckWord(misspelled_word, word_length, tag, &misspelling_start, | |
615 &misspelling_len, NULL); | |
616 | |
617 // Make decision: if only one swap produced a valid word, then we want to | |
618 // return it. If we found two or more, we don't do autocorrection. | |
619 if (misspelling_len == 0) { | |
620 if (autocorrect_word.empty()) { | |
621 autocorrect_word.assign(misspelled_word); | |
622 } else { | |
623 autocorrect_word.clear(); | |
624 break; | |
625 } | |
626 } | |
627 | |
628 // Restore the swapped characters. | |
629 std::swap(misspelled_word[i], misspelled_word[i + 1]); | |
630 } | |
631 return autocorrect_word; | |
632 } | |
633 | |
634 void SpellChecker::EnableAutoSpellCorrect(bool turn_on) { | |
635 auto_spell_correct_turned_on_ = turn_on; | |
636 } | |
637 | |
638 // Returns whether or not the given string is a valid contraction. | |
639 // This function is a fall-back when the SpellcheckWordIterator class | |
640 // returns a concatenated word which is not in the selected dictionary | |
641 // (e.g. "in'n'out") but each word is valid. | |
642 bool SpellChecker::IsValidContraction(const string16& contraction, int tag) { | |
643 SpellcheckWordIterator word_iterator; | |
644 word_iterator.Initialize(&character_attributes_, contraction.c_str(), | |
645 contraction.length(), false); | |
646 | |
647 string16 word; | |
648 int word_start; | |
649 int word_length; | |
650 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) { | |
651 if (!CheckSpelling(word, tag)) | |
652 return false; | |
653 } | |
654 return true; | |
655 } | |
656 | |
657 bool SpellChecker::SpellCheckWord( | |
658 const char16* in_word, | |
659 int in_word_len, | |
660 int tag, | |
661 int* misspelling_start, | |
662 int* misspelling_len, | |
663 std::vector<string16>* optional_suggestions) { | |
664 DCHECK(in_word_len >= 0); | |
665 DCHECK(misspelling_start && misspelling_len) << "Out vars must be given."; | |
666 | |
667 // This must always be called on the same thread (normally the I/O thread). | |
668 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); | |
669 | |
670 // Check if the platform spellchecker is being used. | |
671 if (!is_using_platform_spelling_engine_) { | |
672 // If it isn't, try and init hunspell. | |
673 Initialize(); | |
674 | |
675 // Check to see if hunspell was successfuly initialized. | |
676 if (!hunspell_.get()) | |
677 return true; // Unable to spellcheck, return word is OK. | |
678 } | |
679 | |
680 StatsScope<StatsRate> timer(chrome::Counters::spellcheck_lookup()); | |
681 | |
682 *misspelling_start = 0; | |
683 *misspelling_len = 0; | |
684 if (in_word_len == 0) | |
685 return true; // No input means always spelled correctly. | |
686 | |
687 SpellcheckWordIterator word_iterator; | |
688 string16 word; | |
689 int word_start; | |
690 int word_length; | |
691 word_iterator.Initialize(&character_attributes_, in_word, in_word_len, true); | |
692 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) { | |
693 // Found a word (or a contraction) that the spellchecker can check the | |
694 // spelling of. | |
695 bool word_ok = CheckSpelling(word, tag); | |
696 if (word_ok) | |
697 continue; | |
698 | |
699 // If the given word is a concatenated word of two or more valid words | |
700 // (e.g. "hello:hello"), we should treat it as a valid word. | |
701 if (IsValidContraction(word, tag)) | |
702 continue; | |
703 | |
704 *misspelling_start = word_start; | |
705 *misspelling_len = word_length; | |
706 | |
707 // Get the list of suggested words. | |
708 if (optional_suggestions) | |
709 FillSuggestionList(word, optional_suggestions); | |
710 return false; | |
711 } | |
712 | |
713 return true; | |
714 } | |
715 | |
716 // This task is called in the file loop to write the new word to the custom | |
717 // dictionary in disc. | |
718 class AddWordToCustomDictionaryTask : public Task { | |
719 public: | |
720 AddWordToCustomDictionaryTask(const FilePath& file_name, | |
721 const string16& word) | |
722 : file_name_(file_name), | |
723 word_(UTF16ToUTF8(word)) { | |
724 } | |
725 | |
726 private: | |
727 void Run(); | |
728 | |
729 FilePath file_name_; | |
730 std::string word_; | |
731 }; | |
732 | |
733 void AddWordToCustomDictionaryTask::Run() { | |
734 // Add the word with a new line. Note that, although this would mean an | |
735 // extra line after the list of words, this is potentially harmless and | |
736 // faster, compared to verifying everytime whether to append a new line | |
737 // or not. | |
738 word_ += "\n"; | |
739 FILE* f = file_util::OpenFile(file_name_, "a+"); | |
740 if (f != NULL) | |
741 fputs(word_.c_str(), f); | |
742 file_util::CloseFile(f); | |
743 } | |
744 | |
745 void SpellChecker::AddWord(const string16& word) { | |
746 if (is_using_platform_spelling_engine_) { | |
747 SpellCheckerPlatform::AddWord(word); | |
748 return; | |
749 } | |
750 | |
751 // Check if the |hunspell_| has been initialized at all. | |
752 Initialize(); | |
753 | |
754 // Add the word to hunspell. | |
755 std::string word_to_add = UTF16ToUTF8(word); | |
756 // Don't attempt to add an empty word, or one larger than Hunspell can handle | |
757 if (!word_to_add.empty() && word_to_add.length() < MAXWORDLEN) { | |
758 // Either add the word to |hunspell_|, or, if |hunspell_| is still loading, | |
759 // defer it till after the load completes. | |
760 if (hunspell_.get()) | |
761 hunspell_->add(word_to_add.c_str()); | |
762 else | |
763 custom_words_.push(word_to_add); | |
764 } | |
765 | |
766 // Now add the word to the custom dictionary file. | |
767 ChromeThread::PostTask( | |
768 ChromeThread::FILE, FROM_HERE, | |
769 new AddWordToCustomDictionaryTask(custom_dictionary_file_name_, word)); | |
770 } | |
771 | |
772 bool SpellChecker::CheckSpelling(const string16& word_to_check, int tag) { | |
773 bool word_correct = false; | |
774 | |
775 TimeTicks begin_time = TimeTicks::Now(); | |
776 if (is_using_platform_spelling_engine_) { | |
777 word_correct = SpellCheckerPlatform::CheckSpelling(word_to_check, tag); | |
778 } else { | |
779 std::string word_to_check_utf8(UTF16ToUTF8(word_to_check)); | |
780 // Hunspell shouldn't let us exceed its max, but check just in case | |
781 if (word_to_check_utf8.length() < MAXWORDLEN) { | |
782 // |hunspell_->spell| returns 0 if the word is spelled correctly and | |
783 // non-zero otherwsie. | |
784 word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0); | |
785 } | |
786 } | |
787 DHISTOGRAM_TIMES("Spellcheck.CheckTime", TimeTicks::Now() - begin_time); | |
788 | |
789 return word_correct; | |
790 } | |
791 | |
792 void SpellChecker::FillSuggestionList( | |
793 const string16& wrong_word, | |
794 std::vector<string16>* optional_suggestions) { | |
795 if (is_using_platform_spelling_engine_) { | |
796 SpellCheckerPlatform::FillSuggestionList(wrong_word, optional_suggestions); | |
797 return; | |
798 } | |
799 char** suggestions; | |
800 TimeTicks begin_time = TimeTicks::Now(); | |
801 int number_of_suggestions = hunspell_->suggest(&suggestions, | |
802 UTF16ToUTF8(wrong_word).c_str()); | |
803 DHISTOGRAM_TIMES("Spellcheck.SuggestTime", | |
804 TimeTicks::Now() - begin_time); | |
805 | |
806 // Populate the vector of WideStrings. | |
807 for (int i = 0; i < number_of_suggestions; i++) { | |
808 if (i < kMaxSuggestions) | |
809 optional_suggestions->push_back(UTF8ToUTF16(suggestions[i])); | |
810 free(suggestions[i]); | |
811 } | |
812 if (suggestions != NULL) | |
813 free(suggestions); | |
814 } | |
OLD | NEW |