chrome/browser/spellcheck_worditerator.cc - Issue 21079: Fix for Issue 6431 "Two issues about Vietnamese Spell-Checker"....

Unified Diff: chrome/browser/spellcheck_worditerator.cc

Issue 21079: Fix for Issue 6431 "Two issues about Vietnamese Spell-Checker".... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/browser/spellcheck_worditerator.cc

===================================================================

--- chrome/browser/spellcheck_worditerator.cc (revision 10070)

+++ chrome/browser/spellcheck_worditerator.cc (working copy)

@@ -10,8 +10,9 @@

#include "base/basictypes.h"

#include "base/string_util.h"

+#include "third_party/icu38/public/common/unicode/normlzr.h"

+#include "third_party/icu38/public/common/unicode/schriter.h"

#include "third_party/icu38/public/common/unicode/uchar.h"

-#include "third_party/icu38/public/common/unicode/unorm.h"

#include "third_party/icu38/public/common/unicode/uscript.h"

#include "third_party/icu38/public/common/unicode/uset.h"

#include "third_party/icu38/public/i18n/unicode/ulocdata.h"

@@ -80,6 +81,26 @@

UChar32 character = uset_charAt(exemplar_set, i);

SetWordScript(GetScriptCode(character), true);

}

+ // Many languages use combining characters to input their characters from

+ // keyboards. On the other hand, this exemplar set does not always include

+ // combining characters for such languages.

+ // To treat such combining characters as word characters, we decompose

+ // this exemplar set and treat the decomposed characters as word characters.

+ UnicodeString composed;

+ for (int i = 0; i < length; ++i)

+ composed.append(uset_charAt(exemplar_set, i));

+ UnicodeString decomposed;

+ Normalizer::decompose(composed, FALSE, 0, decomposed, status);

+ if (U_SUCCESS(status)) {

+ StringCharacterIterator iterator(decomposed);

+ UChar32 character = iterator.first32();

+ while (character != CharacterIterator::DONE) {

+ SetWordScript(GetScriptCode(character), true);

+ character = iterator.next32();

+ }

}

uset_close(exemplar_set);

}

@@ -246,25 +267,12 @@

// does not only write NFKD and NFKC can compose ligatures into their ASCII

// alternatives, but also write NFKC keeps accents of characters.

// Therefore, NFKC seems to be the best option for hunspell.

- // To use NKFC for normalization, the length of the output string is mostly

- // equal to the one of the input string. (One exception is ligatures.)

- // To avoid the unorm_normalize() function from being called always twice,

- // we temporarily allocate |input_length| + 1 characters to the output string

- // and call the function with it. We re-allocate the output string

- // only if it cannot store the normalized string, i.e. the output string is

- // longer than the input one.

- const char16* input_string = &word_[input_start];

- UErrorCode error_code = U_ZERO_ERROR;

- int output_length = input_length + 1;

- char16* output_buffer = WriteInto(output_string, output_length);

- output_length = unorm_normalize(input_string, input_length, UNORM_NFKC, 0,

- output_buffer, output_length, &error_code);

- if (error_code == U_BUFFER_OVERFLOW_ERROR) {

- error_code = U_ZERO_ERROR;

- output_buffer = WriteInto(output_string, ++output_length);

- output_length = unorm_normalize(input_string, input_length, UNORM_NFKC, 0,

- output_buffer, output_length, &error_code);

- }

- return (error_code == U_ZERO_ERROR);

+ UnicodeString input(FALSE, &word_[input_start], input_length);

+ UErrorCode status = U_ZERO_ERROR;

+ UnicodeString output;

+ Normalizer::normalize(input, UNORM_NFKC, 0, output, status);

+ if (U_SUCCESS(status))

+ output_string->assign(output.getTerminatedBuffer());

+ return (status == U_ZERO_ERROR);

}

« no previous file with comments | « no previous file | no next file » | no next file with comments »