Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(921)

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 1269343005: Updates SpellcheckWordIterator::GetNextWord to return an enum. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@break-iter
Patch Set: Addressed comments. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Implements a custom word iterator used for our spellchecker. 5 // Implements a custom word iterator used for our spellchecker.
6 6
7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" 7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
8 8
9 #include <map> 9 #include <map>
10 #include <string> 10 #include <string>
11 11
12 #include "base/basictypes.h" 12 #include "base/basictypes.h"
13 #include "base/i18n/break_iterator.h" 13 #include "base/i18n/break_iterator.h"
14 #include "base/logging.h" 14 #include "base/logging.h"
15 #include "base/strings/stringprintf.h" 15 #include "base/strings/stringprintf.h"
16 #include "base/strings/utf_string_conversions.h" 16 #include "base/strings/utf_string_conversions.h"
17 #include "chrome/renderer/spellchecker/spellcheck.h" 17 #include "chrome/renderer/spellchecker/spellcheck.h"
18 #include "third_party/icu/source/common/unicode/normlzr.h" 18 #include "third_party/icu/source/common/unicode/normlzr.h"
19 #include "third_party/icu/source/common/unicode/schriter.h" 19 #include "third_party/icu/source/common/unicode/schriter.h"
20 #include "third_party/icu/source/common/unicode/uscript.h" 20 #include "third_party/icu/source/common/unicode/uscript.h"
21 #include "third_party/icu/source/i18n/unicode/ulocdata.h" 21 #include "third_party/icu/source/i18n/unicode/ulocdata.h"
22 22
23 using base::i18n::BreakIterator;
24
23 // SpellcheckCharAttribute implementation: 25 // SpellcheckCharAttribute implementation:
24 26
25 SpellcheckCharAttribute::SpellcheckCharAttribute() 27 SpellcheckCharAttribute::SpellcheckCharAttribute()
26 : script_code_(USCRIPT_LATIN) { 28 : script_code_(USCRIPT_LATIN) {
27 } 29 }
28 30
29 SpellcheckCharAttribute::~SpellcheckCharAttribute() { 31 SpellcheckCharAttribute::~SpellcheckCharAttribute() {
30 } 32 }
31 33
32 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { 34 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) {
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
317 bool allow_contraction) { 319 bool allow_contraction) {
318 // Create a custom ICU break iterator with empty text used in this object. (We 320 // Create a custom ICU break iterator with empty text used in this object. (We
319 // allow setting text later so we can re-use this iterator.) 321 // allow setting text later so we can re-use this iterator.)
320 DCHECK(attribute); 322 DCHECK(attribute);
321 const base::string16 rule(attribute->GetRuleSet(allow_contraction)); 323 const base::string16 rule(attribute->GetRuleSet(allow_contraction));
322 324
323 // If there is no rule set, the attributes were invalid. 325 // If there is no rule set, the attributes were invalid.
324 if (rule.empty()) 326 if (rule.empty())
325 return false; 327 return false;
326 328
327 scoped_ptr<base::i18n::BreakIterator> iterator( 329 scoped_ptr<BreakIterator> iterator(new BreakIterator(base::string16(), rule));
328 new base::i18n::BreakIterator(base::string16(), rule));
329 if (!iterator->Init()) { 330 if (!iterator->Init()) {
330 // Since we're not passing in any text, the only reason this could fail 331 // Since we're not passing in any text, the only reason this could fail
331 // is if we fail to parse the rules. Since the rules are hardcoded, 332 // is if we fail to parse the rules. Since the rules are hardcoded,
332 // that would be a bug in this class. 333 // that would be a bug in this class.
333 NOTREACHED() << "failed to open iterator (broken rules)"; 334 NOTREACHED() << "failed to open iterator (broken rules)";
334 return false; 335 return false;
335 } 336 }
336 iterator_ = iterator.Pass(); 337 iterator_ = iterator.Pass();
337 338
338 // Set the character attributes so we can normalize the words extracted by 339 // Set the character attributes so we can normalize the words extracted by
(...skipping 13 matching lines...) Expand all
352 // Set the text to be split by this iterator. 353 // Set the text to be split by this iterator.
353 if (!iterator_->SetText(text, length)) { 354 if (!iterator_->SetText(text, length)) {
354 LOG(ERROR) << "failed to set text"; 355 LOG(ERROR) << "failed to set text";
355 return false; 356 return false;
356 } 357 }
357 358
358 text_ = text; 359 text_ = text;
359 return true; 360 return true;
360 } 361 }
361 362
362 bool SpellcheckWordIterator::GetNextWord(base::string16* word_string, 363 SpellcheckWordIterator::WordIteratorStatus SpellcheckWordIterator::GetNextWord(
363 int* word_start, 364 base::string16* word_string,
364 int* word_length) { 365 int* word_start,
366 int* word_length) {
365 DCHECK(!!text_); 367 DCHECK(!!text_);
366 368
367 word_string->clear(); 369 word_string->clear();
368 *word_start = 0; 370 *word_start = 0;
369 *word_length = 0; 371 *word_length = 0;
370 372
371 if (!text_) { 373 if (!text_) {
372 return false; 374 return IS_END_OF_TEXT;
373 } 375 }
374 376
375 // Find a word that can be checked for spelling. Our rule sets filter out 377 // Find a word that can be checked for spelling or a character that can be
376 // invalid words (e.g. numbers and characters not supported by the 378 // skipped over. Rather than moving past a skippable character this returns
377 // spellchecker language) so this ubrk_getRuleStatus() call returns 379 // IS_SKIPPABLE and defers handling the character to the calling function.
378 // UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such
379 // words until we can find a valid word or reach the end of the input string.
380 while (iterator_->Advance()) { 380 while (iterator_->Advance()) {
381 const size_t start = iterator_->prev(); 381 const size_t start = iterator_->prev();
382 const size_t length = iterator_->pos() - start; 382 const size_t length = iterator_->pos() - start;
383 if (iterator_->IsWord()) { 383 switch (iterator_->GetWordBreakStatus()) {
384 if (Normalize(start, length, word_string)) { 384 case BreakIterator::IS_WORD_BREAK: {
385 if (Normalize(start, length, word_string)) {
386 *word_start = start;
387 *word_length = length;
388 return IS_WORD;
389 }
390 break;
391 }
392 case BreakIterator::IS_SKIPPABLE_WORD: {
393 *word_string = iterator_->GetString();
385 *word_start = start; 394 *word_start = start;
386 *word_length = length; 395 *word_length = length;
387 return true; 396 return IS_SKIPPABLE;
397 }
398 // |iterator_| is RULE_BASED so the break status should never be
399 // IS_LINE_OR_CHAR_BREAK.
400 case BreakIterator::IS_LINE_OR_CHAR_BREAK: {
401 NOTREACHED();
402 break;
388 } 403 }
389 } 404 }
390 } 405 }
391 406
392 // There aren't any more words in the given text. 407 // There aren't any more words in the given text.
393 return false; 408 return IS_END_OF_TEXT;
394 } 409 }
395 410
396 void SpellcheckWordIterator::Reset() { 411 void SpellcheckWordIterator::Reset() {
397 iterator_.reset(); 412 iterator_.reset();
398 } 413 }
399 414
400 bool SpellcheckWordIterator::Normalize(int input_start, 415 bool SpellcheckWordIterator::Normalize(int input_start,
401 int input_length, 416 int input_length,
402 base::string16* output_string) const { 417 base::string16* output_string) const {
403 // We use NFKC (Normalization Form, Compatible decomposition, followed by 418 // We use NFKC (Normalization Form, Compatible decomposition, followed by
(...skipping 10 matching lines...) Expand all
414 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) 429 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)
415 return false; 430 return false;
416 431
417 // Copy the normalized text to the output. 432 // Copy the normalized text to the output.
418 icu::StringCharacterIterator it(output); 433 icu::StringCharacterIterator it(output);
419 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) 434 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())
420 attribute_->OutputChar(c, output_string); 435 attribute_->OutputChar(c, output_string);
421 436
422 return !output_string->empty(); 437 return !output_string->empty();
423 } 438 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698