Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(260)

Side by Side Diff: chrome/renderer/spellchecker/spellcheck.cc

Issue 357003: Move the spellchecker to the renderer.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: rename Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:mergeinfo
Merged /branches/chrome_webkit_merge_branch/chrome/browser/spellchecker.cc:r69-2775
OLDNEW
1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/spellchecker.h" 5 #include "chrome/renderer/spellchecker/spellcheck.h"
6 6
7 #include "app/l10n_util.h"
8 #include "base/basictypes.h"
9 #include "base/compiler_specific.h"
10 #include "base/file_util.h" 7 #include "base/file_util.h"
11 #include "base/histogram.h" 8 #include "base/histogram.h"
12 #include "base/logging.h" 9 #include "base/time.h"
13 #include "base/path_service.h" 10 #include "chrome/renderer/render_thread.h"
14 #include "base/stats_counters.h"
15 #include "base/string_util.h"
16 #include "base/thread.h"
17 #include "chrome/browser/browser_process.h"
18 #include "chrome/browser/chrome_thread.h"
19 #include "chrome/browser/net/url_fetcher.h"
20 #include "chrome/browser/profile.h"
21 #include "chrome/browser/spellchecker_common.h"
22 #include "chrome/browser/spellchecker_platform_engine.h"
23 #include "chrome/common/chrome_constants.h"
24 #include "chrome/common/chrome_counters.h"
25 #include "chrome/common/chrome_paths.h"
26 #include "chrome/common/pref_names.h"
27 #include "chrome/common/pref_service.h"
28 #include "grit/generated_resources.h"
29 #include "grit/locale_settings.h"
30 #include "net/url_request/url_request.h"
31 #include "third_party/hunspell/src/hunspell/hunspell.hxx" 11 #include "third_party/hunspell/src/hunspell/hunspell.hxx"
32 12
13 static const int kMaxAutoCorrectWordSize = 8;
14 static const int kMaxSuggestions = 5;
15
33 using base::TimeTicks; 16 using base::TimeTicks;
34 17
35 namespace { 18 SpellCheck::SpellCheck()
36 19 : auto_spell_correct_turned_on_(false),
37 static const struct { 20 // TODO(estade): initialize this properly.
38 // The language. 21 is_using_platform_spelling_engine_(false),
39 const char* language; 22 initialized_(false) {
40 23 // Wait till we check the first word before doing any initializing.
41 // The corresponding language and region, used by the dictionaries.
42 const char* language_region;
43 } g_supported_spellchecker_languages[] = {
44 {"en-US", "en-US"},
45 {"en-GB", "en-GB"},
46 {"en-AU", "en-AU"},
47 {"fr", "fr-FR"},
48 {"it", "it-IT"},
49 {"de", "de-DE"},
50 {"es", "es-ES"},
51 {"nl", "nl-NL"},
52 {"pt-BR", "pt-BR"},
53 {"ru", "ru-RU"},
54 {"pl", "pl-PL"},
55 // {"th", "th-TH"}, // Not to be included in Spellchecker as per B=1277824
56 {"sv", "sv-SE"},
57 {"da", "da-DK"},
58 {"pt-PT", "pt-PT"},
59 {"ro", "ro-RO"},
60 // {"hu", "hu-HU"}, // Not to be included in Spellchecker as per B=1277824
61 {"he", "he-IL"},
62 {"id", "id-ID"},
63 {"cs", "cs-CZ"},
64 {"el", "el-GR"},
65 {"nb", "nb-NO"},
66 {"vi", "vi-VN"},
67 // {"bg", "bg-BG"}, // Not to be included in Spellchecker as per B=1277824
68 {"hr", "hr-HR"},
69 {"lt", "lt-LT"},
70 {"sk", "sk-SK"},
71 {"sl", "sl-SI"},
72 {"ca", "ca-ES"},
73 {"lv", "lv-LV"},
74 // {"uk", "uk-UA"}, // Not to be included in Spellchecker as per B=1277824
75 {"hi", "hi-IN"},
76 {"et", "et-EE"},
77 {"tr", "tr-TR"},
78 };
79
80 // Get the fallback folder (currently chrome::DIR_USER_DATA) where the
81 // dictionary is downloaded in case of system-wide installations.
82 FilePath GetFallbackDictionaryDownloadDirectory() {
83 FilePath dict_dir_userdata;
84 PathService::Get(chrome::DIR_USER_DATA, &dict_dir_userdata);
85 dict_dir_userdata = dict_dir_userdata.AppendASCII("Dictionaries");
86 return dict_dir_userdata;
87 } 24 }
88 25
89 bool SaveBufferToFile(const std::string& data, 26 SpellCheck::~SpellCheck() {
90 FilePath file_to_write) {
91 int num_bytes = data.length();
92 return file_util::WriteFile(file_to_write, data.data(), num_bytes) ==
93 num_bytes;
94 } 27 }
95 28
96 } // namespace 29 void SpellCheck::Init(const base::FileDescriptor& fd,
30 const std::vector<std::string>& custom_words,
31 const std::string language) {
32 initialized_ = true;
33 hunspell_.reset();
34 bdict_file_.reset();
35 fd_ = fd;
36 character_attributes_.SetDefaultLanguage(language);
97 37
98 // This is a helper class which acts as a proxy for invoking a task from the 38 custom_words_.insert(custom_words_.end(),
99 // file loop back to the IO loop. Invoking a task from file loop to the IO 39 custom_words.begin(), custom_words.end());
100 // loop directly is not safe as during browser shutdown, the IO loop tears 40
101 // down before the file loop. To avoid a crash, this object is invoked in the 41 // We delay the actual initialization of hunspell until it is needed.
102 // UI loop from the file loop, from where it gets the IO thread directly from 42 }
103 // g_browser_process and invokes the given task in the IO loop if it is not 43
104 // NULL. This object also takes ownership of the given task. 44 bool SpellCheck::SpellCheckWord(
105 class UIProxyForIOTask : public Task { 45 const char16* in_word,
106 public: 46 int in_word_len,
107 explicit UIProxyForIOTask(Task* callback_task, SpellChecker* spellchecker) 47 int tag,
108 : callback_task_(callback_task), 48 int* misspelling_start,
109 spellchecker_(spellchecker) { 49 int* misspelling_len,
50 std::vector<string16>* optional_suggestions) {
51 DCHECK(in_word_len >= 0);
52 DCHECK(misspelling_start && misspelling_len) << "Out vars must be given.";
53
54 // Do nothing if we need to delay initialization. (Rather than blocking,
55 // report the word as correctly spelled.)
56 if (InitializeIfNeeded())
57 return true;
58
59 // Do nothing if spell checking is disabled.
60 if (initialized_ && fd_.fd == -1)
61 return true;
62
63 *misspelling_start = 0;
64 *misspelling_len = 0;
65 if (in_word_len == 0)
66 return true; // No input means always spelled correctly.
67
68 SpellcheckWordIterator word_iterator;
69 string16 word;
70 int word_start;
71 int word_length;
72 word_iterator.Initialize(&character_attributes_, in_word, in_word_len, true);
73 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) {
74 // Found a word (or a contraction) that the spellchecker can check the
75 // spelling of.
76 if (CheckSpelling(word, tag))
77 continue;
78
79 // If the given word is a concatenated word of two or more valid words
80 // (e.g. "hello:hello"), we should treat it as a valid word.
81 if (IsValidContraction(word, tag))
82 continue;
83
84 *misspelling_start = word_start;
85 *misspelling_len = word_length;
86
87 // Get the list of suggested words.
88 if (optional_suggestions)
89 FillSuggestionList(word, optional_suggestions);
90 return false;
110 } 91 }
111 92
112 private: 93 return true;
113 void Run();
114
115 Task* callback_task_;
116 // The SpellChecker that invoked the file loop task. May be NULL. If not
117 // NULL, then we will Release() on it if we don't run |callback_task_|. This
118 // balances any refs the spellchecker might have had outstanding which it
119 // would have Released() when |callback_task_| was run.
120 SpellChecker* spellchecker_;
121 DISALLOW_COPY_AND_ASSIGN(UIProxyForIOTask);
122 };
123
124 void UIProxyForIOTask::Run() {
125 // This has been invoked in the UI thread.
126 base::Thread* io_thread = g_browser_process->io_thread();
127 if (io_thread) { // io_thread has not been torn down yet.
128 MessageLoop* io_loop = io_thread->message_loop();
129 io_loop->PostTask(FROM_HERE, callback_task_);
130 } else {
131 if (spellchecker_)
132 spellchecker_->Release();
133 delete callback_task_;
134 }
135
136 callback_task_ = NULL;
137 } 94 }
138 95
139 // Design: The spellchecker initializes hunspell_ in the Initialize() method. 96 string16 SpellCheck::GetAutoCorrectionWord(const string16& word, int tag) {
140 // This is done using the dictionary file on disk, e.g. "en-US_1_1.bdic".
141 // Initialization of hunspell_ is held off during this process. If the
142 // dictionary is not available, we first attempt to download and save it. After
143 // the dictionary is downloaded and saved to disk (or the attempt to do so
144 // fails)), corresponding flags are set
145 // in spellchecker - in the IO thread. Since IO thread goes first during closing
146 // of browser, a proxy task |UIProxyForIOTask| is created in the UI thread,
147 // which obtains the IO thread independently and invokes the task in the IO
148 // thread if it's not NULL. After the flags are cleared, a (final) attempt is
149 // made to initialize hunspell_. If it fails even then (dictionary could not
150 // download), no more attempts are made to initialize it.
151 class SaveDictionaryTask : public Task {
152 public:
153 SaveDictionaryTask(Task* on_dictionary_save_complete_callback_task,
154 const FilePath& first_attempt_file_name,
155 const FilePath& fallback_file_name,
156 const std::string& data)
157 : on_dictionary_save_complete_callback_task_(
158 on_dictionary_save_complete_callback_task),
159 first_attempt_file_name_(first_attempt_file_name),
160 fallback_file_name_(fallback_file_name),
161 data_(data) {
162 }
163
164 private:
165 void Run();
166
167 bool SaveBufferToFile(const std::string& data,
168 FilePath file_to_write) {
169 int num_bytes = data.length();
170 return file_util::WriteFile(file_to_write, data.data(), num_bytes) ==
171 num_bytes;
172 }
173
174 // factory object to invokelater back to spellchecker in io thread on
175 // download completion to change appropriate flags.
176 Task* on_dictionary_save_complete_callback_task_;
177
178 // The file which will be stored in the first attempt.
179 FilePath first_attempt_file_name_;
180
181 // The file which will be stored as a fallback.
182 FilePath fallback_file_name_;
183
184 // The buffer which has to be stored to disk.
185 std::string data_;
186
187 // This invokes back to io loop when downloading is over.
188 DISALLOW_COPY_AND_ASSIGN(SaveDictionaryTask);
189 };
190
191 void SaveDictionaryTask::Run() {
192 if (!SaveBufferToFile(data_, first_attempt_file_name_)) {
193 // Try saving it to |fallback_file_name_|, which almost surely has
194 // write permission. If even this fails, there is nothing to be done.
195 FilePath fallback_dir = fallback_file_name_.DirName();
196 // Create the directory if it does not exist.
197 if (!file_util::PathExists(fallback_dir))
198 file_util::CreateDirectory(fallback_dir);
199 SaveBufferToFile(data_, fallback_file_name_);
200 } // Unsuccessful save is taken care of in SpellChecker::Initialize().
201
202 // Set Flag that dictionary is not downloading anymore.
203 MessageLoop* ui_loop = ChromeThread::GetMessageLoop(ChromeThread::UI);
204 ui_loop->PostTask(FROM_HERE,
205 new UIProxyForIOTask(on_dictionary_save_complete_callback_task_, NULL));
206 }
207
208 // Design: this task tries to read the dictionary from disk and load it into
209 // memory. It is executed on the file thread, and posts the results back to
210 // the IO thread (via the UI thread---see UIProxyForIOTask).
211 // The task first checks for the existence of the dictionary in one of the two
212 // given locations. If it does not exist, the task informs the SpellChecker,
213 // which will try to download the directory and run a new ReadDictionaryTask.
214 class ReadDictionaryTask : public Task {
215 public:
216 ReadDictionaryTask(SpellChecker* spellchecker,
217 const FilePath& dict_file_name_app,
218 const FilePath& dict_file_name_usr)
219 : spellchecker_(spellchecker),
220 hunspell_(NULL),
221 bdict_file_(NULL),
222 custom_dictionary_file_name_(
223 spellchecker->custom_dictionary_file_name_),
224 dict_file_name_app_(dict_file_name_app),
225 dict_file_name_usr_(dict_file_name_usr) {
226 }
227
228 virtual void Run() {
229 FilePath bdict_file_path;
230 if (file_util::PathExists(dict_file_name_app_)) {
231 bdict_file_path = dict_file_name_app_;
232 } else if (file_util::PathExists(dict_file_name_usr_)) {
233 bdict_file_path = dict_file_name_usr_;
234 } else {
235 Finish(false);
236 return;
237 }
238
239 bdict_file_ = new file_util::MemoryMappedFile;
240 if (bdict_file_->Initialize(bdict_file_path)) {
241 TimeTicks start_time = TimeTicks::Now();
242
243 hunspell_ =
244 new Hunspell(bdict_file_->data(), bdict_file_->length());
245
246 // Add custom words to Hunspell.
247 std::string contents;
248 file_util::ReadFileToString(custom_dictionary_file_name_, &contents);
249 std::vector<std::string> list_of_words;
250 SplitString(contents, '\n', &list_of_words);
251 for (std::vector<std::string>::iterator it = list_of_words.begin();
252 it != list_of_words.end(); ++it) {
253 hunspell_->add(it->c_str());
254 }
255
256 DHISTOGRAM_TIMES("Spellcheck.InitTime",
257 TimeTicks::Now() - start_time);
258 } else {
259 delete bdict_file_;
260 bdict_file_ = NULL;
261 }
262
263 Finish(true);
264 }
265
266 private:
267 void Finish(bool file_existed) {
268 Task* task = NewRunnableMethod(spellchecker_, &SpellChecker::HunspellInited,
269 hunspell_, bdict_file_, file_existed);
270 if (spellchecker_->file_loop_) {
271 MessageLoop* ui_loop = ChromeThread::GetMessageLoop(ChromeThread::UI);
272 // We were called on the file loop. Post back to the IO loop.
273 // If this never gets posted to the IO loop, then we will leak |hunspell_|
274 // and |bdict_file_|. But that can only happen during shutdown, so it's
275 // not worth caring about.
276 ui_loop->PostTask(FROM_HERE, new UIProxyForIOTask(task, spellchecker_));
277 } else {
278 // We were called directly (e.g., during testing). Run the task directly.
279 task->Run();
280 delete task;
281 }
282 }
283
284 // The SpellChecker we are working for. We are guaranteed to be outlived
285 // by this object because it AddRefs() itself before calling us.
286 // Accessing it is not necessarily thread safe, but are careful to only access
287 // it in ways that are.
288 SpellChecker* spellchecker_;
289 Hunspell* hunspell_;
290 file_util::MemoryMappedFile* bdict_file_;
291
292 FilePath custom_dictionary_file_name_;
293 FilePath dict_file_name_app_;
294 FilePath dict_file_name_usr_;
295
296 DISALLOW_COPY_AND_ASSIGN(ReadDictionaryTask);
297 };
298
299 void SpellChecker::SpellCheckLanguages(std::vector<std::string>* languages) {
300 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages);
301 ++i) {
302 languages->push_back(g_supported_spellchecker_languages[i].language);
303 }
304 }
305
306 // This function returns the language-region version of language name.
307 // e.g. returns hi-IN for hi.
308 std::string SpellChecker::GetSpellCheckLanguageRegion(
309 std::string input_language) {
310 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages);
311 ++i) {
312 std::string language(
313 g_supported_spellchecker_languages[i].language);
314 if (language == input_language)
315 return std::string(
316 g_supported_spellchecker_languages[i].language_region);
317 }
318
319 return input_language;
320 }
321
322
323 std::string SpellChecker::GetLanguageFromLanguageRegion(
324 std::string input_language) {
325 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages);
326 ++i) {
327 std::string language(
328 g_supported_spellchecker_languages[i].language_region);
329 if (language == input_language)
330 return std::string(g_supported_spellchecker_languages[i].language);
331 }
332
333 return input_language;
334 }
335
336 std::string SpellChecker::GetCorrespondingSpellCheckLanguage(
337 const std::string& language) {
338 // Look for exact match in the Spell Check language list.
339 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages);
340 ++i) {
341 // First look for exact match in the language region of the list.
342 std::string spellcheck_language(
343 g_supported_spellchecker_languages[i].language);
344 if (spellcheck_language == language)
345 return language;
346
347 // Next, look for exact match in the language_region part of the list.
348 std::string spellcheck_language_region(
349 g_supported_spellchecker_languages[i].language_region);
350 if (spellcheck_language_region == language)
351 return g_supported_spellchecker_languages[i].language;
352 }
353
354 // Look for a match by comparing only language parts. All the 'en-RR'
355 // except for 'en-GB' exactly matched in the above loop, will match
356 // 'en-US'. This is not ideal because 'en-ZA', 'en-NZ' had
357 // better be matched with 'en-GB'. This does not handle cases like
358 // 'az-Latn-AZ' vs 'az-Arab-AZ', either, but we don't use 3-part
359 // locale ids with a script code in the middle, yet.
360 // TODO(jungshik): Add a better fallback.
361 std::string language_part(language, 0, language.find('-'));
362 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(g_supported_spellchecker_languages);
363 ++i) {
364 std::string spellcheck_language(
365 g_supported_spellchecker_languages[i].language_region);
366 if (spellcheck_language.substr(0, spellcheck_language.find('-')) ==
367 language_part)
368 return spellcheck_language;
369 }
370
371 // No match found - return blank.
372 return std::string();
373 }
374
375 int SpellChecker::GetSpellCheckLanguages(
376 Profile* profile,
377 std::vector<std::string>* languages) {
378 StringPrefMember accept_languages_pref;
379 StringPrefMember dictionary_language_pref;
380 accept_languages_pref.Init(prefs::kAcceptLanguages, profile->GetPrefs(),
381 NULL);
382 dictionary_language_pref.Init(prefs::kSpellCheckDictionary,
383 profile->GetPrefs(), NULL);
384 std::string dictionary_language =
385 WideToASCII(dictionary_language_pref.GetValue());
386
387 // The current dictionary language should be there.
388 languages->push_back(dictionary_language);
389
390 // Now scan through the list of accept languages, and find possible mappings
391 // from this list to the existing list of spell check languages.
392 std::vector<std::string> accept_languages;
393
394 if (SpellCheckerPlatform::SpellCheckerAvailable()) {
395 SpellCheckerPlatform::GetAvailableLanguages(&accept_languages);
396 } else {
397 SplitString(WideToASCII(accept_languages_pref.GetValue()), ',',
398 &accept_languages);
399 }
400 for (std::vector<std::string>::const_iterator i = accept_languages.begin();
401 i != accept_languages.end(); ++i) {
402 std::string language = GetCorrespondingSpellCheckLanguage(*i);
403 if (!language.empty() &&
404 std::find(languages->begin(), languages->end(), language) ==
405 languages->end())
406 languages->push_back(language);
407 }
408
409 for (size_t i = 0; i < languages->size(); ++i) {
410 if ((*languages)[i] == dictionary_language)
411 return i;
412 }
413 return -1;
414 }
415
416 FilePath SpellChecker::GetVersionedFileName(const std::string& input_language,
417 const FilePath& dict_dir) {
418 // The default dictionary version is 1-2. These versions have been augmented
419 // with additional words found by the translation team.
420 static const char kDefaultVersionString[] = "-1-2";
421
422 // The following dictionaries have either not been augmented with additional
423 // words (version 1-1) or have new words, as well as an upgraded dictionary
424 // as of Feb 2009 (version 1-3).
425 static const struct {
426 // The language input.
427 const char* language;
428
429 // The corresponding version.
430 const char* version;
431 } special_version_string[] = {
432 {"en-AU", "-1-1"},
433 {"en-GB", "-1-1"},
434 {"es-ES", "-1-1"},
435 {"nl-NL", "-1-1"},
436 {"ru-RU", "-1-1"},
437 {"sv-SE", "-1-1"},
438 {"he-IL", "-1-1"},
439 {"el-GR", "-1-1"},
440 {"hi-IN", "-1-1"},
441 {"tr-TR", "-1-1"},
442 {"et-EE", "-1-1"},
443 {"fr-FR", "-1-4"}, // to fix crash, fr dictionary was updated to 1.4
444 {"lt-LT", "-1-3"},
445 {"pl-PL", "-1-3"}
446 };
447
448 // Generate the bdict file name using default version string or special
449 // version string, depending on the language.
450 std::string language = GetSpellCheckLanguageRegion(input_language);
451 std::string versioned_bdict_file_name(language + kDefaultVersionString +
452 ".bdic");
453 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(special_version_string); ++i) {
454 if (language == special_version_string[i].language) {
455 versioned_bdict_file_name =
456 language + special_version_string[i].version + ".bdic";
457 break;
458 }
459 }
460
461 return dict_dir.AppendASCII(versioned_bdict_file_name);
462 }
463
464 SpellChecker::SpellChecker(const FilePath& dict_dir,
465 const std::string& language,
466 URLRequestContextGetter* request_context_getter,
467 const FilePath& custom_dictionary_file_name)
468 : given_dictionary_directory_(dict_dir),
469 custom_dictionary_file_name_(custom_dictionary_file_name),
470 tried_to_init_(false),
471 language_(language),
472 worker_loop_(NULL),
473 tried_to_download_dictionary_file_(false),
474 file_loop_(NULL),
475 request_context_getter_(request_context_getter),
476 obtaining_dictionary_(false),
477 auto_spell_correct_turned_on_(false),
478 is_using_platform_spelling_engine_(false),
479 fetcher_(NULL),
480 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
481 if (SpellCheckerPlatform::SpellCheckerAvailable()) {
482 SpellCheckerPlatform::Init();
483 if (SpellCheckerPlatform::PlatformSupportsLanguage(language)) {
484 // If we have reached here, then we know that the current platform
485 // supports the given language and we will use it instead of hunspell.
486 SpellCheckerPlatform::SetLanguage(language);
487 is_using_platform_spelling_engine_ = true;
488 }
489 }
490
491 // Get the corresponding BDIC file name.
492 bdic_file_name_ = GetVersionedFileName(language, dict_dir).BaseName();
493
494 // Get File Loop - hunspell gets initialized here.
495 base::Thread* file_thread = g_browser_process->file_thread();
496 if (file_thread)
497 file_loop_ = file_thread->message_loop();
498
499 // Get the path to the custom dictionary file.
500 if (custom_dictionary_file_name_.empty()) {
501 FilePath personal_file_directory;
502 PathService::Get(chrome::DIR_USER_DATA, &personal_file_directory);
503 custom_dictionary_file_name_ =
504 personal_file_directory.Append(chrome::kCustomDictionaryFileName);
505 }
506
507 // Use this dictionary language as the default one of the
508 // SpellcheckCharAttribute object.
509 character_attributes_.SetDefaultLanguage(language);
510 }
511
512 SpellChecker::~SpellChecker() {
513 // This must be deleted on the I/O thread (see the header). This is the same
514 // thread that SpellCheckWord is called on, so we verify that they were all
515 // the same thread.
516 if (worker_loop_)
517 DCHECK(MessageLoop::current() == worker_loop_);
518 }
519
520 void SpellChecker::StartDictionaryDownload(const FilePath& file_name) {
521 // Determine URL of file to download.
522 static const char kDownloadServerUrl[] =
523 "http://cache.pack.google.com/edgedl/chrome/dict/";
524 GURL url = GURL(std::string(kDownloadServerUrl) + WideToUTF8(
525 l10n_util::ToLower(bdic_file_name_.ToWStringHack())));
526 fetcher_.reset(new URLFetcher(url, URLFetcher::GET, this));
527 fetcher_->set_request_context(request_context_getter_);
528 obtaining_dictionary_ = true;
529 fetcher_->Start();
530 }
531
532 void SpellChecker::OnURLFetchComplete(const URLFetcher* source,
533 const GURL& url,
534 const URLRequestStatus& status,
535 int response_code,
536 const ResponseCookies& cookies,
537 const std::string& data) {
538 DCHECK(source);
539 if ((response_code / 100) != 2) {
540 obtaining_dictionary_ = false;
541 return;
542 }
543
544 // Basic sanity check on the dictionary.
545 // There's the small chance that we might see a 200 status code for a body
546 // that represents some form of failure.
547 if (data.size() < 4 || data[0] != 'B' || data[1] != 'D' || data[2] != 'i' ||
548 data[3] != 'c') {
549 obtaining_dictionary_ = false;
550 return;
551 }
552
553 // Save the file in the file thread, and not here, the IO thread.
554 FilePath first_attempt_file_name = given_dictionary_directory_.Append(
555 bdic_file_name_);
556 FilePath user_data_dir = GetFallbackDictionaryDownloadDirectory();
557 FilePath fallback_file_name = user_data_dir.Append(bdic_file_name_);
558 Task* dic_task = method_factory_.
559 NewRunnableMethod(&SpellChecker::OnDictionarySaveComplete);
560 file_loop_->PostTask(FROM_HERE, new SaveDictionaryTask(dic_task,
561 first_attempt_file_name, fallback_file_name, data));
562 }
563
564 void SpellChecker::OnDictionarySaveComplete() {
565 obtaining_dictionary_ = false;
566 // Now that the dictionary is downloaded, continue trying to download.
567 Initialize();
568 }
569
570 // Initialize SpellChecker. In this method, if the dictionary is not present
571 // in the local disk, it is fetched asynchronously.
572 bool SpellChecker::Initialize() {
573 if (!worker_loop_)
574 worker_loop_ = MessageLoop::current();
575 else
576 DCHECK(worker_loop_ == MessageLoop::current());
577
578 // Return false if the dictionary files are downloading.
579 if (obtaining_dictionary_)
580 return false;
581
582 // Return false if tried to init and failed - don't try multiple times in
583 // this session.
584 if (tried_to_init_)
585 return hunspell_.get() != NULL;
586
587 StatsScope<StatsCounterTimer> timer(chrome::Counters::spellcheck_init());
588
589 // The default place whether the spellcheck dictionary can reside is
590 // chrome::DIR_APP_DICTIONARIES. However, for systemwide installations,
591 // this directory may not have permissions for download. In that case, the
592 // alternate directory for download is chrome::DIR_USER_DATA. We have to check
593 // for the spellcheck dictionaries in both the directories. If not found in
594 // either one, it has to be downloaded in either of the two.
595 // TODO(sidchat): Some sort of UI to warn users that spellchecker is not
596 // working at all (due to failed dictionary download)?
597
598 // File name for downloading in DIR_APP_DICTIONARIES.
599 FilePath dictionary_file_name_app = GetVersionedFileName(language_,
600 given_dictionary_directory_);
601
602 // Filename for downloading in the fallback dictionary download directory,
603 // DIR_USER_DATA.
604 FilePath dict_dir_userdata = GetFallbackDictionaryDownloadDirectory();
605 FilePath dictionary_file_name_usr = GetVersionedFileName(language_,
606 dict_dir_userdata);
607
608 // Balances Release() in HunspellInited(), or in UIProxyForIOTask if the IO
609 // thread is torn down before the ReadDictionaryTask calls us back.
610 AddRef();
611 Task* task = new ReadDictionaryTask(this,
612 dictionary_file_name_app, dictionary_file_name_usr);
613 if (file_loop_) {
614 file_loop_->PostTask(FROM_HERE, task);
615 } else {
616 task->Run();
617 delete task;
618 }
619
620 return hunspell_.get() != NULL;
621 }
622
623 void SpellChecker::HunspellInited(Hunspell* hunspell,
624 file_util::MemoryMappedFile* bdict_file,
625 bool file_existed) {
626 DCHECK(worker_loop_ == MessageLoop::current());
627
628 if (file_existed)
629 tried_to_init_ = true;
630
631 if (!hunspell) {
632 if (!file_existed) {
633 // File didn't exist. We need to download a dictionary.
634 DoDictionaryDownload();
635 }
636
637 // Balances AddRef() in Initialize().
638 Release();
639 return;
640 }
641
642
643 bdict_file_.reset(bdict_file);
644 hunspell_.reset(hunspell);
645 // Add all the custom words we've gotten while Hunspell was loading.
646 while (!custom_words_.empty()) {
647 hunspell_->add(custom_words_.front().c_str());
648 custom_words_.pop();
649 }
650
651 // Balances AddRef() in Initialize().
652 Release();
653 }
654
655 void SpellChecker::DoDictionaryDownload() {
656 // Download the dictionary file.
657 if (file_loop_ && request_context_getter_) {
658 if (!tried_to_download_dictionary_file_) {
659 FilePath dictionary_file_name_app = GetVersionedFileName(language_,
660 given_dictionary_directory_);
661 StartDictionaryDownload(dictionary_file_name_app);
662 tried_to_download_dictionary_file_ = true;
663 } else {
664 // Don't try to download a dictionary more than once.
665 tried_to_init_ = true;
666 }
667 } else {
668 NOTREACHED();
669 }
670 }
671
672 string16 SpellChecker::GetAutoCorrectionWord(const string16& word, int tag) {
673 string16 autocorrect_word; 97 string16 autocorrect_word;
674 if (!auto_spell_correct_turned_on_) 98 if (!auto_spell_correct_turned_on_)
675 return autocorrect_word; // Return the empty string. 99 return autocorrect_word; // Return the empty string.
676 100
677 int word_length = static_cast<int>(word.size()); 101 int word_length = static_cast<int>(word.size());
678 if (word_length < 2 || word_length > kMaxAutoCorrectWordSize) 102 if (word_length < 2 || word_length > kMaxAutoCorrectWordSize)
679 return autocorrect_word; 103 return autocorrect_word;
680 104
105 if (InitializeIfNeeded())
106 return autocorrect_word;
107
681 char16 misspelled_word[kMaxAutoCorrectWordSize + 1]; 108 char16 misspelled_word[kMaxAutoCorrectWordSize + 1];
682 const char16* word_char = word.c_str(); 109 const char16* word_char = word.c_str();
683 for (int i = 0; i <= kMaxAutoCorrectWordSize; i++) { 110 for (int i = 0; i <= kMaxAutoCorrectWordSize; i++) {
684 if (i >= word_length) 111 if (i >= word_length)
685 misspelled_word[i] = NULL; 112 misspelled_word[i] = NULL;
686 else 113 else
687 misspelled_word[i] = word_char[i]; 114 misspelled_word[i] = word_char[i];
688 } 115 }
689 116
690 // Swap adjacent characters and spellcheck. 117 // Swap adjacent characters and spellcheck.
(...skipping 17 matching lines...) Expand all
708 break; 135 break;
709 } 136 }
710 } 137 }
711 138
712 // Restore the swapped characters. 139 // Restore the swapped characters.
713 std::swap(misspelled_word[i], misspelled_word[i + 1]); 140 std::swap(misspelled_word[i], misspelled_word[i + 1]);
714 } 141 }
715 return autocorrect_word; 142 return autocorrect_word;
716 } 143 }
717 144
718 void SpellChecker::EnableAutoSpellCorrect(bool turn_on) { 145 void SpellCheck::EnableAutoSpellCorrect(bool turn_on) {
719 auto_spell_correct_turned_on_ = turn_on; 146 auto_spell_correct_turned_on_ = turn_on;
720 } 147 }
721 148
722 // Returns whether or not the given string is a valid contraction. 149 void SpellCheck::WordAdded(const std::string& word) {
723 // This function is a fall-back when the SpellcheckWordIterator class 150 if (is_using_platform_spelling_engine_)
724 // returns a concatenated word which is not in the selected dictionary 151 return;
725 // (e.g. "in'n'out") but each word is valid.
726 bool SpellChecker::IsValidContraction(const string16& contraction, int tag) {
727 SpellcheckWordIterator word_iterator;
728 word_iterator.Initialize(&character_attributes_, contraction.c_str(),
729 contraction.length(), false);
730 152
731 string16 word; 153 if (!hunspell_.get()) {
732 int word_start; 154 // Save it for later---add it when hunspell is initialized.
733 int word_length; 155 custom_words_.push_back(word);
734 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) {
735 if (!CheckSpelling(word, tag))
736 return false;
737 }
738 return true;
739 }
740
741 bool SpellChecker::SpellCheckWord(
742 const char16* in_word,
743 int in_word_len,
744 int tag,
745 int* misspelling_start,
746 int* misspelling_len,
747 std::vector<string16>* optional_suggestions) {
748 DCHECK(in_word_len >= 0);
749 DCHECK(misspelling_start && misspelling_len) << "Out vars must be given.";
750
751 // This must always be called on the same thread (normally the I/O thread).
752 if (worker_loop_)
753 DCHECK(MessageLoop::current() == worker_loop_);
754
755 // Check if the platform spellchecker is being used.
756 if (!is_using_platform_spelling_engine_) {
757 // If it isn't, try and init hunspell.
758 Initialize();
759
760 // Check to see if hunspell was successfuly initialized.
761 if (!hunspell_.get())
762 return true; // Unable to spellcheck, return word is OK.
763 }
764
765 StatsScope<StatsRate> timer(chrome::Counters::spellcheck_lookup());
766
767 *misspelling_start = 0;
768 *misspelling_len = 0;
769 if (in_word_len == 0)
770 return true; // No input means always spelled correctly.
771
772 SpellcheckWordIterator word_iterator;
773 string16 word;
774 int word_start;
775 int word_length;
776 word_iterator.Initialize(&character_attributes_, in_word, in_word_len, true);
777 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) {
778 // Found a word (or a contraction) that the spellchecker can check the
779 // spelling of.
780 bool word_ok = CheckSpelling(word, tag);
781 if (word_ok)
782 continue;
783
784 // If the given word is a concatenated word of two or more valid words
785 // (e.g. "hello:hello"), we should treat it as a valid word.
786 if (IsValidContraction(word, tag))
787 continue;
788
789 *misspelling_start = word_start;
790 *misspelling_len = word_length;
791
792 // Get the list of suggested words.
793 if (optional_suggestions)
794 FillSuggestionList(word, optional_suggestions);
795 return false;
796 }
797
798 return true;
799 }
800
801 // This task is called in the file loop to write the new word to the custom
802 // dictionary in disc.
803 class AddWordToCustomDictionaryTask : public Task {
804 public:
805 AddWordToCustomDictionaryTask(const FilePath& file_name,
806 const string16& word)
807 : file_name_(file_name),
808 word_(UTF16ToUTF8(word)) {
809 }
810
811 private:
812 void Run();
813
814 FilePath file_name_;
815 std::string word_;
816 };
817
818 void AddWordToCustomDictionaryTask::Run() {
819 // Add the word with a new line. Note that, although this would mean an
820 // extra line after the list of words, this is potentially harmless and
821 // faster, compared to verifying everytime whether to append a new line
822 // or not.
823 word_ += "\n";
824 FILE* f = file_util::OpenFile(file_name_, "a+");
825 if (f != NULL)
826 fputs(word_.c_str(), f);
827 file_util::CloseFile(f);
828 }
829
830 void SpellChecker::AddWord(const string16& word) {
831 if (is_using_platform_spelling_engine_) {
832 SpellCheckerPlatform::AddWord(word);
833 return;
834 }
835
836 // Check if the |hunspell_| has been initialized at all.
837 Initialize();
838
839 // Add the word to hunspell.
840 std::string word_to_add = UTF16ToUTF8(word);
841 // Don't attempt to add an empty word, or one larger than Hunspell can handle
842 if (!word_to_add.empty() && word_to_add.length() < MAXWORDUTF8LEN) {
843 // Either add the word to |hunspell_|, or, if |hunspell_| is still loading,
844 // defer it till after the load completes.
845 if (hunspell_.get())
846 hunspell_->add(word_to_add.c_str());
847 else
848 custom_words_.push(word_to_add);
849 }
850
851 // Now add the word to the custom dictionary file.
852 Task* write_word_task =
853 new AddWordToCustomDictionaryTask(custom_dictionary_file_name_, word);
854 if (file_loop_) {
855 file_loop_->PostTask(FROM_HERE, write_word_task);
856 } else { 156 } else {
857 write_word_task->Run(); 157 AddWordToHunspell(word);
858 delete write_word_task;
859 } 158 }
860 } 159 }
861 160
862 bool SpellChecker::CheckSpelling(const string16& word_to_check, int tag) { 161 void SpellCheck::InitializeHunspell() {
162 if (hunspell_.get())
163 return;
164
165 bdict_file_.reset(new file_util::MemoryMappedFile);
166
167 if (bdict_file_->Initialize(fd_)) {
168 TimeTicks start_time = TimeTicks::Now();
169
170 hunspell_.reset(
171 new Hunspell(bdict_file_->data(), bdict_file_->length()));
172
173 // Add custom words to Hunspell.
174 for (std::vector<std::string>::iterator it = custom_words_.begin();
175 it != custom_words_.end(); ++it) {
176 AddWordToHunspell(*it);
177 }
178
179 DHISTOGRAM_TIMES("Spellcheck.InitTime",
180 TimeTicks::Now() - start_time);
181 }
182 }
183
184 void SpellCheck::AddWordToHunspell(const std::string& word) {
185 if (!word.empty() && word.length() < MAXWORDUTF8LEN)
186 hunspell_->add(word.c_str());
187 }
188
189 bool SpellCheck::InitializeIfNeeded() {
190 if (!initialized_) {
191 RenderThread::current()->RequestSpellCheckDictionary();
192 initialized_ = true;
193 return true;
194 }
195
196 // Check if the platform spellchecker is being used.
197 if (!is_using_platform_spelling_engine_ && fd_.fd != -1) {
198 // If it isn't, init hunspell.
199 InitializeHunspell();
200 }
201
202 return false;
203 }
204
205 // When called, relays the request to check the spelling to the proper
206 // backend, either hunspell or a platform-specific backend.
207 bool SpellCheck::CheckSpelling(const string16& word_to_check, int tag) {
863 bool word_correct = false; 208 bool word_correct = false;
864 209
865 TimeTicks begin_time = TimeTicks::Now();
866 if (is_using_platform_spelling_engine_) { 210 if (is_using_platform_spelling_engine_) {
867 word_correct = SpellCheckerPlatform::CheckSpelling(word_to_check, tag); 211 // TODO(estade): sync IPC to browser.
212 word_correct = true;
868 } else { 213 } else {
869 std::string word_to_check_utf8(UTF16ToUTF8(word_to_check)); 214 std::string word_to_check_utf8(UTF16ToUTF8(word_to_check));
870 // Hunspell shouldn't let us exceed its max, but check just in case 215 // Hunspell shouldn't let us exceed its max, but check just in case
871 if (word_to_check_utf8.length() < MAXWORDUTF8LEN) { 216 if (word_to_check_utf8.length() < MAXWORDUTF8LEN) {
872 // |hunspell_->spell| returns 0 if the word is spelled correctly and 217 // |hunspell_->spell| returns 0 if the word is spelled correctly and
873 // non-zero otherwsie. 218 // non-zero otherwsie.
874 word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0); 219 word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0);
875 } 220 }
876 } 221 }
877 DHISTOGRAM_TIMES("Spellcheck.CheckTime", TimeTicks::Now() - begin_time);
878 222
879 return word_correct; 223 return word_correct;
880 } 224 }
881 225
882 void SpellChecker::FillSuggestionList( 226 void SpellCheck::FillSuggestionList(
883 const string16& wrong_word, 227 const string16& wrong_word,
884 std::vector<string16>* optional_suggestions) { 228 std::vector<string16>* optional_suggestions) {
885 if (is_using_platform_spelling_engine_) { 229 if (is_using_platform_spelling_engine_) {
886 SpellCheckerPlatform::FillSuggestionList(wrong_word, optional_suggestions); 230 // TODO(estade): sync IPC to browser.
887 return; 231 return;
888 } 232 }
889 char** suggestions; 233 char** suggestions;
890 TimeTicks begin_time = TimeTicks::Now(); 234 int number_of_suggestions =
891 int number_of_suggestions = hunspell_->suggest(&suggestions, 235 hunspell_->suggest(&suggestions, UTF16ToUTF8(wrong_word).c_str());
892 UTF16ToUTF8(wrong_word).c_str());
893 DHISTOGRAM_TIMES("Spellcheck.SuggestTime",
894 TimeTicks::Now() - begin_time);
895 236
896 // Populate the vector of WideStrings. 237 // Populate the vector of WideStrings.
897 for (int i = 0; i < number_of_suggestions; i++) { 238 for (int i = 0; i < number_of_suggestions; i++) {
898 if (i < kMaxSuggestions) 239 if (i < kMaxSuggestions)
899 optional_suggestions->push_back(UTF8ToUTF16(suggestions[i])); 240 optional_suggestions->push_back(UTF8ToUTF16(suggestions[i]));
900 free(suggestions[i]); 241 free(suggestions[i]);
901 } 242 }
902 if (suggestions != NULL) 243 if (suggestions != NULL)
903 free(suggestions); 244 free(suggestions);
904 } 245 }
246
247 // Returns whether or not the given string is a valid contraction.
248 // This function is a fall-back when the SpellcheckWordIterator class
249 // returns a concatenated word which is not in the selected dictionary
250 // (e.g. "in'n'out") but each word is valid.
251 bool SpellCheck::IsValidContraction(const string16& contraction, int tag) {
252 SpellcheckWordIterator word_iterator;
253 word_iterator.Initialize(&character_attributes_, contraction.c_str(),
254 contraction.length(), false);
255
256 string16 word;
257 int word_start;
258 int word_length;
259 while (word_iterator.GetNextWord(&word, &word_start, &word_length)) {
260 if (!CheckSpelling(word, tag))
261 return false;
262 }
263 return true;
264 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698