chrome/renderer/spellchecker/spellcheck_worditerator.cc - Issue 1269343005: Updates SpellcheckWordIterator::GetNextWord to return an enum.

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 1269343005: Updates SpellcheckWordIterator::GetNextWord to return an enum. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@break-iter

Patch Set: Addressed comments. Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « chrome/renderer/spellchecker/spellcheck_worditerator.h ('k') | chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc » ('j') | chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Implements a custom word iterator used for our spellchecker.	5 // Implements a custom word iterator used for our spellchecker.

6	6

7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"	7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"

8	8

9 #include <map>	9 #include <map>

10 #include <string>	10 #include <string>

11	11

12 #include "base/basictypes.h"	12 #include "base/basictypes.h"

13 #include "base/i18n/break_iterator.h"	13 #include "base/i18n/break_iterator.h"

14 #include "base/logging.h"	14 #include "base/logging.h"

15 #include "base/strings/stringprintf.h"	15 #include "base/strings/stringprintf.h"

16 #include "base/strings/utf_string_conversions.h"	16 #include "base/strings/utf_string_conversions.h"

17 #include "chrome/renderer/spellchecker/spellcheck.h"	17 #include "chrome/renderer/spellchecker/spellcheck.h"

18 #include "third_party/icu/source/common/unicode/normlzr.h"	18 #include "third_party/icu/source/common/unicode/normlzr.h"

19 #include "third_party/icu/source/common/unicode/schriter.h"	19 #include "third_party/icu/source/common/unicode/schriter.h"

20 #include "third_party/icu/source/common/unicode/uscript.h"	20 #include "third_party/icu/source/common/unicode/uscript.h"

21 #include "third_party/icu/source/i18n/unicode/ulocdata.h"	21 #include "third_party/icu/source/i18n/unicode/ulocdata.h"

22	22

	23 using base::i18n::BreakIterator;

	24

23 // SpellcheckCharAttribute implementation:	25 // SpellcheckCharAttribute implementation:

24	26

25 SpellcheckCharAttribute::SpellcheckCharAttribute()	27 SpellcheckCharAttribute::SpellcheckCharAttribute()

26 : script_code_(USCRIPT_LATIN) {	28 : script_code_(USCRIPT_LATIN) {

27 }	29 }

28	30

29 SpellcheckCharAttribute::~SpellcheckCharAttribute() {	31 SpellcheckCharAttribute::~SpellcheckCharAttribute() {

30 }	32 }

31	33

32 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) {	34 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) {

(...skipping 284 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
317 bool allow_contraction) {	319 bool allow_contraction) {

318 // Create a custom ICU break iterator with empty text used in this object. (We	320 // Create a custom ICU break iterator with empty text used in this object. (We

319 // allow setting text later so we can re-use this iterator.)	321 // allow setting text later so we can re-use this iterator.)

320 DCHECK(attribute);	322 DCHECK(attribute);

321 const base::string16 rule(attribute->GetRuleSet(allow_contraction));	323 const base::string16 rule(attribute->GetRuleSet(allow_contraction));

322	324

323 // If there is no rule set, the attributes were invalid.	325 // If there is no rule set, the attributes were invalid.

324 if (rule.empty())	326 if (rule.empty())

325 return false;	327 return false;

326	328

327 scoped_ptr<base::i18n::BreakIterator> iterator(	329 scoped_ptr<BreakIterator> iterator(new BreakIterator(base::string16(), rule));

328 new base::i18n::BreakIterator(base::string16(), rule));

329 if (!iterator->Init()) {	330 if (!iterator->Init()) {

330 // Since we're not passing in any text, the only reason this could fail	331 // Since we're not passing in any text, the only reason this could fail

331 // is if we fail to parse the rules. Since the rules are hardcoded,	332 // is if we fail to parse the rules. Since the rules are hardcoded,

332 // that would be a bug in this class.	333 // that would be a bug in this class.

333 NOTREACHED() << "failed to open iterator (broken rules)";	334 NOTREACHED() << "failed to open iterator (broken rules)";

334 return false;	335 return false;

335 }	336 }

336 iterator_ = iterator.Pass();	337 iterator_ = iterator.Pass();

337	338

338 // Set the character attributes so we can normalize the words extracted by	339 // Set the character attributes so we can normalize the words extracted by

(...skipping 13 matching lines...) Expand all Loading...
352 // Set the text to be split by this iterator.	353 // Set the text to be split by this iterator.

353 if (!iterator_->SetText(text, length)) {	354 if (!iterator_->SetText(text, length)) {

354 LOG(ERROR) << "failed to set text";	355 LOG(ERROR) << "failed to set text";

355 return false;	356 return false;

356 }	357 }

357	358

358 text_ = text;	359 text_ = text;

359 return true;	360 return true;

360 }	361 }

361	362

362 bool SpellcheckWordIterator::GetNextWord(base::string16* word_string,	363 SpellcheckWordIterator::WordIteratorStatus SpellcheckWordIterator::GetNextWord(

363 int* word_start,	364 base::string16* word_string,

364 int* word_length) {	365 int* word_start,

	366 int* word_length) {

365 DCHECK(!!text_);	367 DCHECK(!!text_);

366	368

367 word_string->clear();	369 word_string->clear();

368 *word_start = 0;	370 *word_start = 0;

369 *word_length = 0;	371 *word_length = 0;

370	372

371 if (!text_) {	373 if (!text_) {

372 return false;	374 return IS_END_OF_TEXT;

373 }	375 }

374	376

375 // Find a word that can be checked for spelling. Our rule sets filter out	377 // Find a word that can be checked for spelling or a character that can be

376 // invalid words (e.g. numbers and characters not supported by the	378 // skipped over. Rather than moving past a skippable character this returns

377 // spellchecker language) so this ubrk_getRuleStatus() call returns	379 // IS_SKIPPABLE and defers handling the character to the calling function.

378 // UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such

379 // words until we can find a valid word or reach the end of the input string.

380 while (iterator_->Advance()) {	380 while (iterator_->Advance()) {

381 const size_t start = iterator_->prev();	381 const size_t start = iterator_->prev();

382 const size_t length = iterator_->pos() - start;	382 const size_t length = iterator_->pos() - start;

383 if (iterator_->IsWord()) {	383 BreakIterator::WordBreakStatus break_status =
	please use gerrit instead 2015/08/13 00:13:28 inline this variable. inline this variable. Julius 2015/08/13 01:32:03 Done. Show quoted text On 2015/08/13 00:13:28, Rouslan wrote: > inline this variable. Done.
384 if (Normalize(start, length, word_string)) {	384 iterator_->GetWordBreakStatus();

	385 switch (break_status) {

	386 case BreakIterator::IS_WORD_BREAK: {

	387 if (Normalize(start, length, word_string)) {

	388 *word_start = start;

	389 *word_length = length;

	390 return IS_WORD;

	391 }

	392 break;

	393 }

	394 case BreakIterator::IS_SKIPPABLE_WORD: {

	395 *word_string = iterator_->GetString();

385 *word_start = start;	396 *word_start = start;

386 *word_length = length;	397 *word_length = length;

387 return true;	398 return IS_SKIPPABLE;

	399 }

	400 // \|iterator_\| is RULE_BASED so \|break_status\| should never be
	please use gerrit instead 2015/08/13 00:13:28 If you inline \|break_status\|, then update the comm If you inline \|break_status\|, then update the comment to say simply "break status". Julius 2015/08/13 01:32:03 Done. Show quoted text On 2015/08/13 00:13:28, Rouslan wrote: > If you inline \|break_status\|, then update the comment to say simply "break > status". Done.
	401 // IS_LINE_OR_CHAR_BREAK.

	402 case BreakIterator::IS_LINE_OR_CHAR_BREAK: {

	403 NOTREACHED();

	404 break;

388 }	405 }

389 }	406 }

390 }	407 }

391	408

392 // There aren't any more words in the given text.	409 // There aren't any more words in the given text.

393 return false;	410 return IS_END_OF_TEXT;

394 }	411 }

395	412

396 void SpellcheckWordIterator::Reset() {	413 void SpellcheckWordIterator::Reset() {

397 iterator_.reset();	414 iterator_.reset();

398 }	415 }

399	416

400 bool SpellcheckWordIterator::Normalize(int input_start,	417 bool SpellcheckWordIterator::Normalize(int input_start,

401 int input_length,	418 int input_length,

402 base::string16* output_string) const {	419 base::string16* output_string) const {

403 // We use NFKC (Normalization Form, Compatible decomposition, followed by	420 // We use NFKC (Normalization Form, Compatible decomposition, followed by

(...skipping 10 matching lines...) Expand all Loading...
414 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)	431 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)

415 return false;	432 return false;

416	433

417 // Copy the normalized text to the output.	434 // Copy the normalized text to the output.

418 icu::StringCharacterIterator it(output);	435 icu::StringCharacterIterator it(output);

419 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())	436 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())

420 attribute_->OutputChar(c, output_string);	437 attribute_->OutputChar(c, output_string);

421	438

422 return !output_string->empty();	439 return !output_string->empty();

423 }	440 }

OLD	NEW