OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Implements a custom word iterator used for our spellchecker. | 5 // Implements a custom word iterator used for our spellchecker. |
6 | 6 |
7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" | 7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" |
8 | 8 |
9 #include <map> | 9 #include <map> |
10 #include <string> | 10 #include <string> |
11 | 11 |
12 #include "base/basictypes.h" | 12 #include "base/basictypes.h" |
13 #include "base/i18n/break_iterator.h" | 13 #include "base/i18n/break_iterator.h" |
14 #include "base/logging.h" | 14 #include "base/logging.h" |
15 #include "base/strings/stringprintf.h" | 15 #include "base/strings/stringprintf.h" |
16 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
17 #include "chrome/renderer/spellchecker/spellcheck.h" | 17 #include "chrome/renderer/spellchecker/spellcheck.h" |
18 #include "third_party/icu/source/common/unicode/normlzr.h" | 18 #include "third_party/icu/source/common/unicode/normlzr.h" |
19 #include "third_party/icu/source/common/unicode/schriter.h" | 19 #include "third_party/icu/source/common/unicode/schriter.h" |
20 #include "third_party/icu/source/common/unicode/uscript.h" | 20 #include "third_party/icu/source/common/unicode/uscript.h" |
21 #include "third_party/icu/source/i18n/unicode/ulocdata.h" | 21 #include "third_party/icu/source/i18n/unicode/ulocdata.h" |
22 | 22 |
| 23 using base::i18n::BreakIterator; |
| 24 |
23 // SpellcheckCharAttribute implementation: | 25 // SpellcheckCharAttribute implementation: |
24 | 26 |
25 SpellcheckCharAttribute::SpellcheckCharAttribute() | 27 SpellcheckCharAttribute::SpellcheckCharAttribute() |
26 : script_code_(USCRIPT_LATIN) { | 28 : script_code_(USCRIPT_LATIN) { |
27 } | 29 } |
28 | 30 |
29 SpellcheckCharAttribute::~SpellcheckCharAttribute() { | 31 SpellcheckCharAttribute::~SpellcheckCharAttribute() { |
30 } | 32 } |
31 | 33 |
32 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { | 34 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { |
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
317 bool allow_contraction) { | 319 bool allow_contraction) { |
318 // Create a custom ICU break iterator with empty text used in this object. (We | 320 // Create a custom ICU break iterator with empty text used in this object. (We |
319 // allow setting text later so we can re-use this iterator.) | 321 // allow setting text later so we can re-use this iterator.) |
320 DCHECK(attribute); | 322 DCHECK(attribute); |
321 const base::string16 rule(attribute->GetRuleSet(allow_contraction)); | 323 const base::string16 rule(attribute->GetRuleSet(allow_contraction)); |
322 | 324 |
323 // If there is no rule set, the attributes were invalid. | 325 // If there is no rule set, the attributes were invalid. |
324 if (rule.empty()) | 326 if (rule.empty()) |
325 return false; | 327 return false; |
326 | 328 |
327 scoped_ptr<base::i18n::BreakIterator> iterator( | 329 scoped_ptr<BreakIterator> iterator(new BreakIterator(base::string16(), rule)); |
328 new base::i18n::BreakIterator(base::string16(), rule)); | |
329 if (!iterator->Init()) { | 330 if (!iterator->Init()) { |
330 // Since we're not passing in any text, the only reason this could fail | 331 // Since we're not passing in any text, the only reason this could fail |
331 // is if we fail to parse the rules. Since the rules are hardcoded, | 332 // is if we fail to parse the rules. Since the rules are hardcoded, |
332 // that would be a bug in this class. | 333 // that would be a bug in this class. |
333 NOTREACHED() << "failed to open iterator (broken rules)"; | 334 NOTREACHED() << "failed to open iterator (broken rules)"; |
334 return false; | 335 return false; |
335 } | 336 } |
336 iterator_ = iterator.Pass(); | 337 iterator_ = iterator.Pass(); |
337 | 338 |
338 // Set the character attributes so we can normalize the words extracted by | 339 // Set the character attributes so we can normalize the words extracted by |
(...skipping 13 matching lines...) Expand all Loading... |
352 // Set the text to be split by this iterator. | 353 // Set the text to be split by this iterator. |
353 if (!iterator_->SetText(text, length)) { | 354 if (!iterator_->SetText(text, length)) { |
354 LOG(ERROR) << "failed to set text"; | 355 LOG(ERROR) << "failed to set text"; |
355 return false; | 356 return false; |
356 } | 357 } |
357 | 358 |
358 text_ = text; | 359 text_ = text; |
359 return true; | 360 return true; |
360 } | 361 } |
361 | 362 |
362 bool SpellcheckWordIterator::GetNextWord(base::string16* word_string, | 363 SpellcheckWordIterator::WordIteratorStatus SpellcheckWordIterator::GetNextWord( |
363 int* word_start, | 364 base::string16* word_string, |
364 int* word_length) { | 365 int* word_start, |
| 366 int* word_length) { |
365 DCHECK(!!text_); | 367 DCHECK(!!text_); |
366 | 368 |
367 word_string->clear(); | 369 word_string->clear(); |
368 *word_start = 0; | 370 *word_start = 0; |
369 *word_length = 0; | 371 *word_length = 0; |
370 | 372 |
371 if (!text_) { | 373 if (!text_) { |
372 return false; | 374 return IS_END_OF_TEXT; |
373 } | 375 } |
374 | 376 |
375 // Find a word that can be checked for spelling. Our rule sets filter out | 377 // Find a word that can be checked for spelling or a character that can be |
376 // invalid words (e.g. numbers and characters not supported by the | 378 // skipped over. Rather than moving past a skippable character this returns |
377 // spellchecker language) so this ubrk_getRuleStatus() call returns | 379 // IS_SKIPPABLE and defers handling the character to the calling function. |
378 // UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such | |
379 // words until we can find a valid word or reach the end of the input string. | |
380 while (iterator_->Advance()) { | 380 while (iterator_->Advance()) { |
381 const size_t start = iterator_->prev(); | 381 const size_t start = iterator_->prev(); |
382 const size_t length = iterator_->pos() - start; | 382 const size_t length = iterator_->pos() - start; |
383 if (iterator_->IsWord()) { | 383 switch (iterator_->GetWordBreakStatus()) { |
384 if (Normalize(start, length, word_string)) { | 384 case BreakIterator::IS_WORD_BREAK: { |
| 385 if (Normalize(start, length, word_string)) { |
| 386 *word_start = start; |
| 387 *word_length = length; |
| 388 return IS_WORD; |
| 389 } |
| 390 break; |
| 391 } |
| 392 case BreakIterator::IS_SKIPPABLE_WORD: { |
| 393 *word_string = iterator_->GetString(); |
385 *word_start = start; | 394 *word_start = start; |
386 *word_length = length; | 395 *word_length = length; |
387 return true; | 396 return IS_SKIPPABLE; |
| 397 } |
| 398 // |iterator_| is RULE_BASED so the break status should never be |
| 399 // IS_LINE_OR_CHAR_BREAK. |
| 400 case BreakIterator::IS_LINE_OR_CHAR_BREAK: { |
| 401 NOTREACHED(); |
| 402 break; |
388 } | 403 } |
389 } | 404 } |
390 } | 405 } |
391 | 406 |
392 // There aren't any more words in the given text. | 407 // There aren't any more words in the given text. |
393 return false; | 408 return IS_END_OF_TEXT; |
394 } | 409 } |
395 | 410 |
396 void SpellcheckWordIterator::Reset() { | 411 void SpellcheckWordIterator::Reset() { |
397 iterator_.reset(); | 412 iterator_.reset(); |
398 } | 413 } |
399 | 414 |
400 bool SpellcheckWordIterator::Normalize(int input_start, | 415 bool SpellcheckWordIterator::Normalize(int input_start, |
401 int input_length, | 416 int input_length, |
402 base::string16* output_string) const { | 417 base::string16* output_string) const { |
403 // We use NFKC (Normalization Form, Compatible decomposition, followed by | 418 // We use NFKC (Normalization Form, Compatible decomposition, followed by |
(...skipping 10 matching lines...) Expand all Loading... |
414 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) | 429 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) |
415 return false; | 430 return false; |
416 | 431 |
417 // Copy the normalized text to the output. | 432 // Copy the normalized text to the output. |
418 icu::StringCharacterIterator it(output); | 433 icu::StringCharacterIterator it(output); |
419 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) | 434 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) |
420 attribute_->OutputChar(c, output_string); | 435 attribute_->OutputChar(c, output_string); |
421 | 436 |
422 return !output_string->empty(); | 437 return !output_string->empty(); |
423 } | 438 } |
OLD | NEW |