OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/spellcheck_worditerator.h" | 5 #include "chrome/browser/spellcheck_worditerator.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 #include <string> | 8 #include <string> |
9 | 9 |
10 #include "base/basictypes.h" | 10 #include "base/basictypes.h" |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 for (int i = 0; i < length; ++i) { | 77 for (int i = 0; i < length; ++i) { |
78 UChar32 character = uset_charAt(exemplar_set, i); | 78 UChar32 character = uset_charAt(exemplar_set, i); |
79 SetWordScript(GetScriptCode(character), true); | 79 SetWordScript(GetScriptCode(character), true); |
80 } | 80 } |
81 | 81 |
82 // Many languages use combining characters to input their characters from | 82 // Many languages use combining characters to input their characters from |
83 // keyboards. On the other hand, this exemplar set does not always include | 83 // keyboards. On the other hand, this exemplar set does not always include |
84 // combining characters for such languages. | 84 // combining characters for such languages. |
85 // To treat such combining characters as word characters, we decompose | 85 // To treat such combining characters as word characters, we decompose |
86 // this exemplar set and treat the decomposed characters as word characters. | 86 // this exemplar set and treat the decomposed characters as word characters. |
87 UnicodeString composed; | 87 icu::UnicodeString composed; |
88 for (int i = 0; i < length; ++i) | 88 for (int i = 0; i < length; ++i) |
89 composed.append(uset_charAt(exemplar_set, i)); | 89 composed.append(uset_charAt(exemplar_set, i)); |
90 | 90 |
91 UnicodeString decomposed; | 91 icu::UnicodeString decomposed; |
92 Normalizer::decompose(composed, FALSE, 0, decomposed, status); | 92 icu::Normalizer::decompose(composed, FALSE, 0, decomposed, status); |
93 if (U_SUCCESS(status)) { | 93 if (U_SUCCESS(status)) { |
94 StringCharacterIterator iterator(decomposed); | 94 icu::StringCharacterIterator iterator(decomposed); |
95 UChar32 character = iterator.first32(); | 95 UChar32 character = iterator.first32(); |
96 while (character != CharacterIterator::DONE) { | 96 while (character != icu::CharacterIterator::DONE) { |
97 SetWordScript(GetScriptCode(character), true); | 97 SetWordScript(GetScriptCode(character), true); |
98 character = iterator.next32(); | 98 character = iterator.next32(); |
99 } | 99 } |
100 } | 100 } |
101 } | 101 } |
102 uset_close(exemplar_set); | 102 uset_close(exemplar_set); |
103 } | 103 } |
104 | 104 |
105 // Returns whether or not the given character is a character used by the | 105 // Returns whether or not the given character is a character used by the |
106 // selected dictionary. | 106 // selected dictionary. |
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
257 // also consists of ISO/IEC 8859-{2,3,4,9,10}, ligatures, fullwidth latin, | 257 // also consists of ISO/IEC 8859-{2,3,4,9,10}, ligatures, fullwidth latin, |
258 // etc. For its details, please read the script table in | 258 // etc. For its details, please read the script table in |
259 // "http://www.unicode.org/Public/UNIDATA/Scripts.txt". | 259 // "http://www.unicode.org/Public/UNIDATA/Scripts.txt". |
260 bool SpellcheckWordIterator::Normalize(int input_start, | 260 bool SpellcheckWordIterator::Normalize(int input_start, |
261 int input_length, | 261 int input_length, |
262 string16* output_string) const { | 262 string16* output_string) const { |
263 // Unicode Standard Annex #15 "http://www.unicode.org/unicode/reports/tr15/" | 263 // Unicode Standard Annex #15 "http://www.unicode.org/unicode/reports/tr15/" |
264 // does not only write NFKD and NFKC can compose ligatures into their ASCII | 264 // does not only write NFKD and NFKC can compose ligatures into their ASCII |
265 // alternatives, but also write NFKC keeps accents of characters. | 265 // alternatives, but also write NFKC keeps accents of characters. |
266 // Therefore, NFKC seems to be the best option for hunspell. | 266 // Therefore, NFKC seems to be the best option for hunspell. |
267 UnicodeString input(FALSE, &word_[input_start], input_length); | 267 icu::UnicodeString input(FALSE, &word_[input_start], input_length); |
268 UErrorCode status = U_ZERO_ERROR; | 268 UErrorCode status = U_ZERO_ERROR; |
269 UnicodeString output; | 269 icu::UnicodeString output; |
270 Normalizer::normalize(input, UNORM_NFKC, 0, output, status); | 270 icu::Normalizer::normalize(input, UNORM_NFKC, 0, output, status); |
271 if (U_SUCCESS(status)) | 271 if (U_SUCCESS(status)) |
272 output_string->assign(output.getTerminatedBuffer()); | 272 output_string->assign(output.getTerminatedBuffer()); |
273 return status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING; | 273 return status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING; |
274 } | 274 } |
OLD | NEW |