Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Implements a custom word iterator used for our spellchecker. | 5 // Implements a custom word iterator used for our spellchecker. |
| 6 | 6 |
| 7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" | 7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" |
| 8 | 8 |
| 9 #include <map> | 9 #include <map> |
| 10 #include <string> | 10 #include <string> |
| 11 | 11 |
| 12 #include "base/basictypes.h" | 12 #include "base/basictypes.h" |
| 13 #include "base/i18n/break_iterator.h" | 13 #include "base/i18n/break_iterator.h" |
| 14 #include "base/logging.h" | 14 #include "base/logging.h" |
| 15 #include "base/strings/stringprintf.h" | 15 #include "base/strings/stringprintf.h" |
| 16 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
| 17 #include "chrome/renderer/spellchecker/spellcheck.h" | 17 #include "chrome/renderer/spellchecker/spellcheck.h" |
| 18 #include "third_party/icu/source/common/unicode/normlzr.h" | 18 #include "third_party/icu/source/common/unicode/normlzr.h" |
| 19 #include "third_party/icu/source/common/unicode/schriter.h" | 19 #include "third_party/icu/source/common/unicode/schriter.h" |
| 20 #include "third_party/icu/source/common/unicode/uscript.h" | 20 #include "third_party/icu/source/common/unicode/uscript.h" |
| 21 #include "third_party/icu/source/i18n/unicode/ulocdata.h" | 21 #include "third_party/icu/source/i18n/unicode/ulocdata.h" |
| 22 | 22 |
| 23 using base::i18n::BreakIterator; | |
| 24 | |
| 23 // SpellcheckCharAttribute implementation: | 25 // SpellcheckCharAttribute implementation: |
| 24 | 26 |
| 25 SpellcheckCharAttribute::SpellcheckCharAttribute() | 27 SpellcheckCharAttribute::SpellcheckCharAttribute() |
| 26 : script_code_(USCRIPT_LATIN) { | 28 : script_code_(USCRIPT_LATIN) { |
| 27 } | 29 } |
| 28 | 30 |
| 29 SpellcheckCharAttribute::~SpellcheckCharAttribute() { | 31 SpellcheckCharAttribute::~SpellcheckCharAttribute() { |
| 30 } | 32 } |
| 31 | 33 |
| 32 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { | 34 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { |
| (...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 317 bool allow_contraction) { | 319 bool allow_contraction) { |
| 318 // Create a custom ICU break iterator with empty text used in this object. (We | 320 // Create a custom ICU break iterator with empty text used in this object. (We |
| 319 // allow setting text later so we can re-use this iterator.) | 321 // allow setting text later so we can re-use this iterator.) |
| 320 DCHECK(attribute); | 322 DCHECK(attribute); |
| 321 const base::string16 rule(attribute->GetRuleSet(allow_contraction)); | 323 const base::string16 rule(attribute->GetRuleSet(allow_contraction)); |
| 322 | 324 |
| 323 // If there is no rule set, the attributes were invalid. | 325 // If there is no rule set, the attributes were invalid. |
| 324 if (rule.empty()) | 326 if (rule.empty()) |
| 325 return false; | 327 return false; |
| 326 | 328 |
| 327 scoped_ptr<base::i18n::BreakIterator> iterator( | 329 scoped_ptr<BreakIterator> iterator(new BreakIterator(base::string16(), rule)); |
| 328 new base::i18n::BreakIterator(base::string16(), rule)); | |
| 329 if (!iterator->Init()) { | 330 if (!iterator->Init()) { |
| 330 // Since we're not passing in any text, the only reason this could fail | 331 // Since we're not passing in any text, the only reason this could fail |
| 331 // is if we fail to parse the rules. Since the rules are hardcoded, | 332 // is if we fail to parse the rules. Since the rules are hardcoded, |
| 332 // that would be a bug in this class. | 333 // that would be a bug in this class. |
| 333 NOTREACHED() << "failed to open iterator (broken rules)"; | 334 NOTREACHED() << "failed to open iterator (broken rules)"; |
| 334 return false; | 335 return false; |
| 335 } | 336 } |
| 336 iterator_ = iterator.Pass(); | 337 iterator_ = iterator.Pass(); |
| 337 | 338 |
| 338 // Set the character attributes so we can normalize the words extracted by | 339 // Set the character attributes so we can normalize the words extracted by |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 352 // Set the text to be split by this iterator. | 353 // Set the text to be split by this iterator. |
| 353 if (!iterator_->SetText(text, length)) { | 354 if (!iterator_->SetText(text, length)) { |
| 354 LOG(ERROR) << "failed to set text"; | 355 LOG(ERROR) << "failed to set text"; |
| 355 return false; | 356 return false; |
| 356 } | 357 } |
| 357 | 358 |
| 358 text_ = text; | 359 text_ = text; |
| 359 return true; | 360 return true; |
| 360 } | 361 } |
| 361 | 362 |
| 362 bool SpellcheckWordIterator::GetNextWord(base::string16* word_string, | 363 SpellcheckWordIterator::WordIteratorStatus SpellcheckWordIterator::GetNextWord( |
| 363 int* word_start, | 364 base::string16* word_string, |
| 364 int* word_length) { | 365 int* word_start, |
| 366 int* word_length) { | |
| 365 DCHECK(!!text_); | 367 DCHECK(!!text_); |
| 366 | 368 |
| 367 word_string->clear(); | 369 word_string->clear(); |
| 368 *word_start = 0; | 370 *word_start = 0; |
| 369 *word_length = 0; | 371 *word_length = 0; |
| 370 | 372 |
| 371 if (!text_) { | 373 if (!text_) { |
| 372 return false; | 374 return IS_END_OF_TEXT; |
| 373 } | 375 } |
| 374 | 376 |
| 375 // Find a word that can be checked for spelling. Our rule sets filter out | 377 // Find a word that can be checked for spelling or a character that can be |
| 376 // invalid words (e.g. numbers and characters not supported by the | 378 // skipped over. Rather than moving past a skippable character this returns |
| 377 // spellchecker language) so this ubrk_getRuleStatus() call returns | 379 // IS_SKIPPABLE and defers handling the character to the calling function. |
| 378 // UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such | |
| 379 // words until we can find a valid word or reach the end of the input string. | |
| 380 while (iterator_->Advance()) { | 380 while (iterator_->Advance()) { |
| 381 const size_t start = iterator_->prev(); | 381 const size_t start = iterator_->prev(); |
| 382 const size_t length = iterator_->pos() - start; | 382 const size_t length = iterator_->pos() - start; |
| 383 if (iterator_->IsWord()) { | 383 BreakIterator::WordBreakStatus break_status = |
|
please use gerrit instead
2015/08/13 00:13:28
inline this variable.
Julius
2015/08/13 01:32:03
Done.
| |
| 384 if (Normalize(start, length, word_string)) { | 384 iterator_->GetWordBreakStatus(); |
| 385 switch (break_status) { | |
| 386 case BreakIterator::IS_WORD_BREAK: { | |
| 387 if (Normalize(start, length, word_string)) { | |
| 388 *word_start = start; | |
| 389 *word_length = length; | |
| 390 return IS_WORD; | |
| 391 } | |
| 392 break; | |
| 393 } | |
| 394 case BreakIterator::IS_SKIPPABLE_WORD: { | |
| 395 *word_string = iterator_->GetString(); | |
| 385 *word_start = start; | 396 *word_start = start; |
| 386 *word_length = length; | 397 *word_length = length; |
| 387 return true; | 398 return IS_SKIPPABLE; |
| 399 } | |
| 400 // |iterator_| is RULE_BASED so |break_status| should never be | |
|
please use gerrit instead
2015/08/13 00:13:28
If you inline |break_status|, then update the comm
Julius
2015/08/13 01:32:03
Done.
| |
| 401 // IS_LINE_OR_CHAR_BREAK. | |
| 402 case BreakIterator::IS_LINE_OR_CHAR_BREAK: { | |
| 403 NOTREACHED(); | |
| 404 break; | |
| 388 } | 405 } |
| 389 } | 406 } |
| 390 } | 407 } |
| 391 | 408 |
| 392 // There aren't any more words in the given text. | 409 // There aren't any more words in the given text. |
| 393 return false; | 410 return IS_END_OF_TEXT; |
| 394 } | 411 } |
| 395 | 412 |
| 396 void SpellcheckWordIterator::Reset() { | 413 void SpellcheckWordIterator::Reset() { |
| 397 iterator_.reset(); | 414 iterator_.reset(); |
| 398 } | 415 } |
| 399 | 416 |
| 400 bool SpellcheckWordIterator::Normalize(int input_start, | 417 bool SpellcheckWordIterator::Normalize(int input_start, |
| 401 int input_length, | 418 int input_length, |
| 402 base::string16* output_string) const { | 419 base::string16* output_string) const { |
| 403 // We use NFKC (Normalization Form, Compatible decomposition, followed by | 420 // We use NFKC (Normalization Form, Compatible decomposition, followed by |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 414 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) | 431 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) |
| 415 return false; | 432 return false; |
| 416 | 433 |
| 417 // Copy the normalized text to the output. | 434 // Copy the normalized text to the output. |
| 418 icu::StringCharacterIterator it(output); | 435 icu::StringCharacterIterator it(output); |
| 419 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) | 436 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) |
| 420 attribute_->OutputChar(c, output_string); | 437 attribute_->OutputChar(c, output_string); |
| 421 | 438 |
| 422 return !output_string->empty(); | 439 return !output_string->empty(); |
| 423 } | 440 } |
| OLD | NEW |