| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/spellcheck_worditerator.h" | 5 #include "chrome/browser/spellcheck_worditerator.h" |
| 6 | 6 |
| 7 #include <map> | 7 #include <map> |
| 8 #include <string> | 8 #include <string> |
| 9 | 9 |
| 10 #include "base/basictypes.h" | 10 #include "base/basictypes.h" |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 77 for (int i = 0; i < length; ++i) { | 77 for (int i = 0; i < length; ++i) { |
| 78 UChar32 character = uset_charAt(exemplar_set, i); | 78 UChar32 character = uset_charAt(exemplar_set, i); |
| 79 SetWordScript(GetScriptCode(character), true); | 79 SetWordScript(GetScriptCode(character), true); |
| 80 } | 80 } |
| 81 | 81 |
| 82 // Many languages use combining characters to input their characters from | 82 // Many languages use combining characters to input their characters from |
| 83 // keyboards. On the other hand, this exemplar set does not always include | 83 // keyboards. On the other hand, this exemplar set does not always include |
| 84 // combining characters for such languages. | 84 // combining characters for such languages. |
| 85 // To treat such combining characters as word characters, we decompose | 85 // To treat such combining characters as word characters, we decompose |
| 86 // this exemplar set and treat the decomposed characters as word characters. | 86 // this exemplar set and treat the decomposed characters as word characters. |
| 87 UnicodeString composed; | 87 icu::UnicodeString composed; |
| 88 for (int i = 0; i < length; ++i) | 88 for (int i = 0; i < length; ++i) |
| 89 composed.append(uset_charAt(exemplar_set, i)); | 89 composed.append(uset_charAt(exemplar_set, i)); |
| 90 | 90 |
| 91 UnicodeString decomposed; | 91 icu::UnicodeString decomposed; |
| 92 Normalizer::decompose(composed, FALSE, 0, decomposed, status); | 92 icu::Normalizer::decompose(composed, FALSE, 0, decomposed, status); |
| 93 if (U_SUCCESS(status)) { | 93 if (U_SUCCESS(status)) { |
| 94 StringCharacterIterator iterator(decomposed); | 94 icu::StringCharacterIterator iterator(decomposed); |
| 95 UChar32 character = iterator.first32(); | 95 UChar32 character = iterator.first32(); |
| 96 while (character != CharacterIterator::DONE) { | 96 while (character != icu::CharacterIterator::DONE) { |
| 97 SetWordScript(GetScriptCode(character), true); | 97 SetWordScript(GetScriptCode(character), true); |
| 98 character = iterator.next32(); | 98 character = iterator.next32(); |
| 99 } | 99 } |
| 100 } | 100 } |
| 101 } | 101 } |
| 102 uset_close(exemplar_set); | 102 uset_close(exemplar_set); |
| 103 } | 103 } |
| 104 | 104 |
| 105 // Returns whether or not the given character is a character used by the | 105 // Returns whether or not the given character is a character used by the |
| 106 // selected dictionary. | 106 // selected dictionary. |
| (...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 257 // also consists of ISO/IEC 8859-{2,3,4,9,10}, ligatures, fullwidth latin, | 257 // also consists of ISO/IEC 8859-{2,3,4,9,10}, ligatures, fullwidth latin, |
| 258 // etc. For its details, please read the script table in | 258 // etc. For its details, please read the script table in |
| 259 // "http://www.unicode.org/Public/UNIDATA/Scripts.txt". | 259 // "http://www.unicode.org/Public/UNIDATA/Scripts.txt". |
| 260 bool SpellcheckWordIterator::Normalize(int input_start, | 260 bool SpellcheckWordIterator::Normalize(int input_start, |
| 261 int input_length, | 261 int input_length, |
| 262 string16* output_string) const { | 262 string16* output_string) const { |
| 263 // Unicode Standard Annex #15 "http://www.unicode.org/unicode/reports/tr15/" | 263 // Unicode Standard Annex #15 "http://www.unicode.org/unicode/reports/tr15/" |
| 264 // does not only write NFKD and NFKC can compose ligatures into their ASCII | 264 // does not only write NFKD and NFKC can compose ligatures into their ASCII |
| 265 // alternatives, but also write NFKC keeps accents of characters. | 265 // alternatives, but also write NFKC keeps accents of characters. |
| 266 // Therefore, NFKC seems to be the best option for hunspell. | 266 // Therefore, NFKC seems to be the best option for hunspell. |
| 267 UnicodeString input(FALSE, &word_[input_start], input_length); | 267 icu::UnicodeString input(FALSE, &word_[input_start], input_length); |
| 268 UErrorCode status = U_ZERO_ERROR; | 268 UErrorCode status = U_ZERO_ERROR; |
| 269 UnicodeString output; | 269 icu::UnicodeString output; |
| 270 Normalizer::normalize(input, UNORM_NFKC, 0, output, status); | 270 icu::Normalizer::normalize(input, UNORM_NFKC, 0, output, status); |
| 271 if (U_SUCCESS(status)) | 271 if (U_SUCCESS(status)) |
| 272 output_string->assign(output.getTerminatedBuffer()); | 272 output_string->assign(output.getTerminatedBuffer()); |
| 273 return status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING; | 273 return status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING; |
| 274 } | 274 } |
| OLD | NEW |