OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Implements a custom word iterator used for our spellchecker. | 5 // Implements a custom word iterator used for our spellchecker. |
6 | 6 |
7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" | 7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" |
8 | 8 |
9 #include <map> | 9 #include <map> |
10 #include <string> | 10 #include <string> |
11 | 11 |
12 #include "base/basictypes.h" | 12 #include "base/basictypes.h" |
13 #include "base/i18n/break_iterator.h" | 13 #include "base/i18n/break_iterator.h" |
14 #include "base/logging.h" | 14 #include "base/logging.h" |
15 #include "base/strings/stringprintf.h" | 15 #include "base/strings/stringprintf.h" |
16 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
17 #include "chrome/renderer/spellchecker/spellcheck.h" | 17 #include "chrome/renderer/spellchecker/spellcheck.h" |
18 #include "third_party/icu/source/common/unicode/normlzr.h" | 18 #include "third_party/icu/source/common/unicode/normlzr.h" |
19 #include "third_party/icu/source/common/unicode/schriter.h" | 19 #include "third_party/icu/source/common/unicode/schriter.h" |
20 #include "third_party/icu/source/common/unicode/uscript.h" | 20 #include "third_party/icu/source/common/unicode/uscript.h" |
21 #include "third_party/icu/source/i18n/unicode/ulocdata.h" | 21 #include "third_party/icu/source/i18n/unicode/ulocdata.h" |
22 | 22 |
23 using base::i18n::BreakIterator; | |
24 | |
23 // SpellcheckCharAttribute implementation: | 25 // SpellcheckCharAttribute implementation: |
24 | 26 |
25 SpellcheckCharAttribute::SpellcheckCharAttribute() | 27 SpellcheckCharAttribute::SpellcheckCharAttribute() |
26 : script_code_(USCRIPT_LATIN) { | 28 : script_code_(USCRIPT_LATIN) { |
27 } | 29 } |
28 | 30 |
29 SpellcheckCharAttribute::~SpellcheckCharAttribute() { | 31 SpellcheckCharAttribute::~SpellcheckCharAttribute() { |
30 } | 32 } |
31 | 33 |
32 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { | 34 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { |
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
317 bool allow_contraction) { | 319 bool allow_contraction) { |
318 // Create a custom ICU break iterator with empty text used in this object. (We | 320 // Create a custom ICU break iterator with empty text used in this object. (We |
319 // allow setting text later so we can re-use this iterator.) | 321 // allow setting text later so we can re-use this iterator.) |
320 DCHECK(attribute); | 322 DCHECK(attribute); |
321 const base::string16 rule(attribute->GetRuleSet(allow_contraction)); | 323 const base::string16 rule(attribute->GetRuleSet(allow_contraction)); |
322 | 324 |
323 // If there is no rule set, the attributes were invalid. | 325 // If there is no rule set, the attributes were invalid. |
324 if (rule.empty()) | 326 if (rule.empty()) |
325 return false; | 327 return false; |
326 | 328 |
327 scoped_ptr<base::i18n::BreakIterator> iterator( | 329 scoped_ptr<BreakIterator> iterator(new BreakIterator(base::string16(), rule)); |
328 new base::i18n::BreakIterator(base::string16(), rule)); | |
329 if (!iterator->Init()) { | 330 if (!iterator->Init()) { |
330 // Since we're not passing in any text, the only reason this could fail | 331 // Since we're not passing in any text, the only reason this could fail |
331 // is if we fail to parse the rules. Since the rules are hardcoded, | 332 // is if we fail to parse the rules. Since the rules are hardcoded, |
332 // that would be a bug in this class. | 333 // that would be a bug in this class. |
333 NOTREACHED() << "failed to open iterator (broken rules)"; | 334 NOTREACHED() << "failed to open iterator (broken rules)"; |
334 return false; | 335 return false; |
335 } | 336 } |
336 iterator_ = iterator.Pass(); | 337 iterator_ = iterator.Pass(); |
337 | 338 |
338 // Set the character attributes so we can normalize the words extracted by | 339 // Set the character attributes so we can normalize the words extracted by |
(...skipping 13 matching lines...) Expand all Loading... | |
352 // Set the text to be split by this iterator. | 353 // Set the text to be split by this iterator. |
353 if (!iterator_->SetText(text, length)) { | 354 if (!iterator_->SetText(text, length)) { |
354 LOG(ERROR) << "failed to set text"; | 355 LOG(ERROR) << "failed to set text"; |
355 return false; | 356 return false; |
356 } | 357 } |
357 | 358 |
358 text_ = text; | 359 text_ = text; |
359 return true; | 360 return true; |
360 } | 361 } |
361 | 362 |
362 bool SpellcheckWordIterator::GetNextWord(base::string16* word_string, | 363 SpellcheckWordIterator::WordIteratorStatus SpellcheckWordIterator::GetNextWord( |
363 int* word_start, | 364 base::string16* word_string, |
364 int* word_length) { | 365 int* word_start, |
366 int* word_length) { | |
365 DCHECK(!!text_); | 367 DCHECK(!!text_); |
366 | 368 |
367 word_string->clear(); | 369 word_string->clear(); |
368 *word_start = 0; | 370 *word_start = 0; |
369 *word_length = 0; | 371 *word_length = 0; |
370 | 372 |
371 if (!text_) { | 373 if (!text_) { |
372 return false; | 374 return IS_END_OF_TEXT; |
373 } | 375 } |
374 | 376 |
375 // Find a word that can be checked for spelling. Our rule sets filter out | 377 // Find a word that can be checked for spelling or a character that can be |
376 // invalid words (e.g. numbers and characters not supported by the | 378 // skipped over. Rather than moving past a skippable character this returns |
377 // spellchecker language) so this ubrk_getRuleStatus() call returns | 379 // IS_SKIPPABLE and defers handling the character to the calling function. |
378 // UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such | |
379 // words until we can find a valid word or reach the end of the input string. | |
380 while (iterator_->Advance()) { | 380 while (iterator_->Advance()) { |
381 const size_t start = iterator_->prev(); | 381 const size_t start = iterator_->prev(); |
382 const size_t length = iterator_->pos() - start; | 382 const size_t length = iterator_->pos() - start; |
383 if (iterator_->IsWord()) { | 383 BreakIterator::WordBreakStatus break_status = |
please use gerrit instead
2015/08/13 00:13:28
inline this variable.
Julius
2015/08/13 01:32:03
Done.
| |
384 if (Normalize(start, length, word_string)) { | 384 iterator_->GetWordBreakStatus(); |
385 switch (break_status) { | |
386 case BreakIterator::IS_WORD_BREAK: { | |
387 if (Normalize(start, length, word_string)) { | |
388 *word_start = start; | |
389 *word_length = length; | |
390 return IS_WORD; | |
391 } | |
392 break; | |
393 } | |
394 case BreakIterator::IS_SKIPPABLE_WORD: { | |
395 *word_string = iterator_->GetString(); | |
385 *word_start = start; | 396 *word_start = start; |
386 *word_length = length; | 397 *word_length = length; |
387 return true; | 398 return IS_SKIPPABLE; |
399 } | |
400 // |iterator_| is RULE_BASED so |break_status| should never be | |
please use gerrit instead
2015/08/13 00:13:28
If you inline |break_status|, then update the comm
Julius
2015/08/13 01:32:03
Done.
| |
401 // IS_LINE_OR_CHAR_BREAK. | |
402 case BreakIterator::IS_LINE_OR_CHAR_BREAK: { | |
403 NOTREACHED(); | |
404 break; | |
388 } | 405 } |
389 } | 406 } |
390 } | 407 } |
391 | 408 |
392 // There aren't any more words in the given text. | 409 // There aren't any more words in the given text. |
393 return false; | 410 return IS_END_OF_TEXT; |
394 } | 411 } |
395 | 412 |
396 void SpellcheckWordIterator::Reset() { | 413 void SpellcheckWordIterator::Reset() { |
397 iterator_.reset(); | 414 iterator_.reset(); |
398 } | 415 } |
399 | 416 |
400 bool SpellcheckWordIterator::Normalize(int input_start, | 417 bool SpellcheckWordIterator::Normalize(int input_start, |
401 int input_length, | 418 int input_length, |
402 base::string16* output_string) const { | 419 base::string16* output_string) const { |
403 // We use NFKC (Normalization Form, Compatible decomposition, followed by | 420 // We use NFKC (Normalization Form, Compatible decomposition, followed by |
(...skipping 10 matching lines...) Expand all Loading... | |
414 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) | 431 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) |
415 return false; | 432 return false; |
416 | 433 |
417 // Copy the normalized text to the output. | 434 // Copy the normalized text to the output. |
418 icu::StringCharacterIterator it(output); | 435 icu::StringCharacterIterator it(output); |
419 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) | 436 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) |
420 attribute_->OutputChar(c, output_string); | 437 attribute_->OutputChar(c, output_string); |
421 | 438 |
422 return !output_string->empty(); | 439 return !output_string->empty(); |
423 } | 440 } |
OLD | NEW |