Chromium Code Reviews| Index: base/i18n/word_iterator.h |
| =================================================================== |
| --- base/i18n/word_iterator.h (revision 68637) |
| +++ base/i18n/word_iterator.h (working copy) |
| @@ -8,18 +8,27 @@ |
| #include <vector> |
| -#include "unicode/ubrk.h" |
| -#include "unicode/uchar.h" |
| - |
| #include "base/basictypes.h" |
| #include "base/string16.h" |
| // The WordIterator class iterates through the words and word breaks |
| -// in a string. (In the string " foo bar! ", the word breaks are at the |
| -// periods in ". .foo. .bar.!. .".) |
| +// in a UTF-16 string. |
| // |
| -// To extract the words from a string, move a WordIterator through the |
| -// string and test whether IsWord() is true. E.g., |
| +// It provides two modes, BREAK_WORD and BREAK_LINE, which modify how |
| +// trailing non-word characters are aggregated into the returned word. |
| +// |
| +// Under BREAK_WORD mode (more common), the non-word characters are |
| +// not included with a returned word (eg. in the UTF-16 equivalent of |
| +// the string " foo bar! ", the word breaks are at the periods in |
| +// ". .foo. .bar.!. ."). |
| +// |
| +// Under BREAK_LINE mode (less common), the non-word character are |
|
Evan Martin
2010/12/09 22:29:10
typo: character*s*
|
| +// included in the word, breaking only when a space-equivalent character |
| +// is encountered (eg. in the UTF16-equivalent of the string " foo bar! ", |
|
Evan Martin
2010/12/09 22:29:10
I think "UTF16-equivalent" is needless here, but y
|
| +// the word breaks are at the periods in ". .foo .bar! ."). |
|
Evan Martin
2010/12/09 22:29:10
typo: double-space
|
| +// |
| +// To extract the words from a string, move a BREAK_WORD WordIterator |
| +// through the string and test whether IsWord() is true. E.g., |
|
Evan Martin
2010/12/09 22:29:10
I hate to be pedantic but here we use "e.g." and a
|
| // WordIterator iter(&str, WordIterator::BREAK_WORD); |
| // if (!iter.Init()) return false; |
| // while (iter.Advance()) { |
| @@ -68,11 +77,11 @@ |
| string16 GetWord() const; |
| private: |
| - // ICU iterator. |
| - UBreakIterator* iter_; |
| -#if !defined(WCHAR_T_IS_UTF16) |
| - std::vector<UChar> chars_; |
| -#endif |
| + // ICU iterator -- avoid ICU ubrk.h dependence. |
|
Evan Martin
2010/12/09 22:29:10
Without this review for context, this comment line
|
| + // This is actually an ICU UBreakiterator* type, which turns out to be |
| + // a typedef for a void* in the ICU headers. Using void* directly prevents |
| + // callers from needing access to the ICU public headers directory. |
| + void* iter_; |
| // The string we're iterating over. |
| const string16* string_; |