Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Defines an iterator class that enumerates words supported by our spellchecker | 5 // Defines an iterator class that enumerates words supported by our spellchecker |
| 6 // from multi-language text. This class is used for filtering out characters | 6 // from multi-language text. This class is used for filtering out characters |
| 7 // not supported by our spellchecker. | 7 // not supported by our spellchecker. |
| 8 | 8 |
| 9 #ifndef CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_ | 9 #ifndef CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_ |
| 10 #define CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_ | 10 #define CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_ |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 105 // | 105 // |
| 106 // base::string16 word; | 106 // base::string16 word; |
| 107 // int offset; | 107 // int offset; |
| 108 // int length; | 108 // int length; |
| 109 // while (iterator.GetNextWord(&word, &offset, &length)) { | 109 // while (iterator.GetNextWord(&word, &offset, &length)) { |
| 110 // ... | 110 // ... |
| 111 // } | 111 // } |
| 112 // | 112 // |
| 113 class SpellcheckWordIterator { | 113 class SpellcheckWordIterator { |
| 114 public: | 114 public: |
| 115 enum WordIteratorStatus { | |
| 116 // The end of a sequence of text that the iterator recognizes as characters | |
| 117 // that can form a word. | |
| 118 IS_WORD, | |
| 119 // Non-word characters that the iterator can skip past, such as punctuation, | |
| 120 // whitespace, and characters from another character set. | |
| 121 IS_SKIPPABLE_CHAR, | |
|
please use gerrit instead
2015/08/11 22:54:10
IS_SKIPPABLE
Putting "_CHAR" in there might confu
Julius
2015/08/12 20:25:59
Done.
| |
| 122 // The end of the text that the iterator is going over. | |
| 123 IS_END_OF_TEXT | |
| 124 }; | |
| 125 | |
| 115 SpellcheckWordIterator(); | 126 SpellcheckWordIterator(); |
| 116 ~SpellcheckWordIterator(); | 127 ~SpellcheckWordIterator(); |
| 117 | 128 |
| 118 // Initializes a word-iterator object with the language-specific attribute. If | 129 // Initializes a word-iterator object with the language-specific attribute. If |
| 119 // we need to split contractions and concatenated words, call this function | 130 // we need to split contractions and concatenated words, call this function |
| 120 // with its 'allow_contraction' parameter false. (This function uses lots of | 131 // with its 'allow_contraction' parameter false. (This function uses lots of |
| 121 // temporal memory to compile a custom word-break rule into an automaton.) | 132 // temporal memory to compile a custom word-break rule into an automaton.) |
| 122 bool Initialize(const SpellcheckCharAttribute* attribute, | 133 bool Initialize(const SpellcheckCharAttribute* attribute, |
| 123 bool allow_contraction); | 134 bool allow_contraction); |
| 124 | 135 |
| 125 // Returns whether this word iterator is initialized. | 136 // Returns whether this word iterator is initialized. |
| 126 bool IsInitialized() const; | 137 bool IsInitialized() const; |
| 127 | 138 |
| 128 // Set text to be iterated. (This text does not have to be NULL-terminated.) | 139 // Set text to be iterated. (This text does not have to be NULL-terminated.) |
| 129 // This function also resets internal state so we can reuse this iterator | 140 // This function also resets internal state so we can reuse this iterator |
| 130 // without calling Initialize(). | 141 // without calling Initialize(). |
| 131 bool SetText(const base::char16* text, size_t length); | 142 bool SetText(const base::char16* text, size_t length); |
| 132 | 143 |
| 133 // Retrieves a word (or a contraction), stores its copy to 'word_string', and | 144 // Advances |iterator_| through |text_| and gets the current status of the |
| 134 // stores the position and the length for input word to 'word_start'. Since | 145 // word iterator within the |text|: |
| 135 // this function normalizes the output word, the length of 'word_string' may | 146 // - Returns IS_WORD if the iterator just found the end of a sequence of word |
|
please use gerrit instead
2015/08/11 22:54:10
Put a newline before a bullet point to make the co
Julius
2015/08/12 20:25:59
Done.
| |
| 136 // be different from the 'word_length'. Therefore, when we call functions that | 147 // characters and it was able to normalize the sequence. This stores the |
| 137 // changes the input text, such as string16::replace(), we need to use | 148 // normalized string into |word_string| and stores the position and length |
| 138 // 'word_start' and 'word_length' as listed in the following snippet. | 149 // into |word_start| and |word_length| respectively. Keep in mind that |
| 139 // | 150 // since this function normalizes the output word, the length of |
| 140 // while(iterator.GetNextWord(&word, &offset, &length)) | 151 // |word_string| may be different from the |word_length|. |
|
please use gerrit instead
2015/08/11 22:54:10
Keep this example in the comment please.
Julius
2015/08/12 20:25:59
Done.
| |
| 141 // text.replace(offset, length, word); | 152 // - Returns IS_SKIPPABLE_CHAR if the iterator just found a character that |
| 142 // | 153 // the iterator can skip past such as punctuation, whitespace, and |
| 143 bool GetNextWord(base::string16* word_string, | 154 // characters from another character set. |
| 144 int* word_start, | 155 // - Returns IS_END_OF_TEXT if the iterator has reached the end of |text_|. |
| 145 int* word_length); | 156 SpellcheckWordIterator::WordIteratorStatus |
| 157 GetNextWord(base::string16* word_string, int* word_start, int* word_length); | |
| 146 | 158 |
| 147 // Releases all the resources attached to this object. | 159 // Releases all the resources attached to this object. |
| 148 void Reset(); | 160 void Reset(); |
| 149 | 161 |
| 150 private: | 162 private: |
| 151 // Normalizes a non-terminated string returned from an ICU word-break | 163 // Normalizes a non-terminated string returned from an ICU word-break |
| 152 // iterator. A word returned from an ICU break iterator may include characters | 164 // iterator. A word returned from an ICU break iterator may include characters |
| 153 // not supported by our spellchecker, e.g. ligatures, combining/ characters, | 165 // not supported by our spellchecker, e.g. ligatures, combining/ characters, |
| 154 // full-width letters, etc. This function replaces such characters with | 166 // full-width letters, etc. This function replaces such characters with |
| 155 // alternative characters supported by our spellchecker. This function also | 167 // alternative characters supported by our spellchecker. This function also |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 167 const SpellcheckCharAttribute* attribute_; | 179 const SpellcheckCharAttribute* attribute_; |
| 168 | 180 |
| 169 // The break iterator. | 181 // The break iterator. |
| 170 scoped_ptr<base::i18n::BreakIterator> iterator_; | 182 scoped_ptr<base::i18n::BreakIterator> iterator_; |
| 171 | 183 |
| 172 DISALLOW_COPY_AND_ASSIGN(SpellcheckWordIterator); | 184 DISALLOW_COPY_AND_ASSIGN(SpellcheckWordIterator); |
| 173 }; | 185 }; |
| 174 | 186 |
| 175 #endif // CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_ | 187 #endif // CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_ |
| 176 | 188 |
| OLD | NEW |