Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(713)

Side by Side Diff: base/i18n/break_iterator.h

Issue 6713119: Make 'dangerous download warning' visible in Japanese UI on Windows. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | base/i18n/break_iterator.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef BASE_I18N_BREAK_ITERATOR_H_ 5 #ifndef BASE_I18N_BREAK_ITERATOR_H_
6 #define BASE_I18N_BREAK_ITERATOR_H_ 6 #define BASE_I18N_BREAK_ITERATOR_H_
7 #pragma once 7 #pragma once
8 8
9 #include "base/basictypes.h" 9 #include "base/basictypes.h"
10 #include "base/string16.h" 10 #include "base/string16.h"
11 11
12 // The BreakIterator class iterates through the words, word breaks, and 12 // The BreakIterator class iterates through the words, word breaks, and
13 // line breaks in a UTF-16 string. 13 // line breaks in a UTF-16 string.
14 // 14 //
15 // It provides several modes, BREAK_WORD, BREAK_SPACE, and BREAK_NEWLINE, 15 // It provides several modes, BREAK_WORD, BREAK_LINE, and BREAK_NEWLINE,
16 // which modify how characters are aggregated into the returned string. 16 // which modify how characters are aggregated into the returned string.
17 // 17 //
18 // Under BREAK_WORD mode, once a word is encountered any non-word 18 // Under BREAK_WORD mode, once a word is encountered any non-word
19 // characters are not included in the returned string (e.g. in the 19 // characters are not included in the returned string (e.g. in the
20 // UTF-16 equivalent of the string " foo bar! ", the word breaks are at 20 // UTF-16 equivalent of the string " foo bar! ", the word breaks are at
21 // the periods in ". .foo. .bar.!. ."). 21 // the periods in ". .foo. .bar.!. .").
22 // Note that Chinese/Japanese/Thai do not use spaces between words so that
23 // boundaries can fall in the middle of a continuous run of non-space /
24 // non-punctuation characters.
22 // 25 //
23 // Under BREAK_SPACE mode, once a word is encountered, any non-word 26 // Under BREAK_LINE mode, once a line breaking opportunity is encountered,
24 // characters are included in the returned string, breaking only when a 27 // any non-word characters are included in the returned string, breaking
25 // space-equivalent character is encountered (e.g. in the 28 // only when a space-equivalent character or a line breaking opportunity
26 // UTF16-equivalent of the string " foo bar! ", the word breaks are at 29 // is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ",
27 // the periods in ". .foo .bar! ."). 30 // the breaks are at the periods in ". .foo .bar! .").
31 //
32 // Note that lines can be broken at any character/syllable/grapheme cluster
33 // boundary in Chinese/Japanese/Korean and at word boundaries in Thai
34 // (Thai does not use spaces between words). Therefore, this is NOT the same
35 // as breaking only at space-equivalent characters where its former
36 // name (BREAK_SPACE) implied.
28 // 37 //
29 // Under BREAK_NEWLINE mode, all characters are included in the returned 38 // Under BREAK_NEWLINE mode, all characters are included in the returned
30 // string, breking only when a newline-equivalent character is encountered 39 // string, breking only when a newline-equivalent character is encountered
31 // (eg. in the UTF-16 equivalent of the string "foo\nbar!\n\n", the line 40 // (eg. in the UTF-16 equivalent of the string "foo\nbar!\n\n", the line
32 // breaks are at the periods in ".foo\n.bar\n.\n."). 41 // breaks are at the periods in ".foo\n.bar\n.\n.").
33 // 42 //
34 // To extract the words from a string, move a BREAK_WORD BreakIterator 43 // To extract the words from a string, move a BREAK_WORD BreakIterator
35 // through the string and test whether IsWord() is true. E.g., 44 // through the string and test whether IsWord() is true. E.g.,
36 // BreakIterator iter(&str, BreakIterator::BREAK_WORD); 45 // BreakIterator iter(&str, BreakIterator::BREAK_WORD);
37 // if (!iter.Init()) return false; 46 // if (!iter.Init()) return false;
38 // while (iter.Advance()) { 47 // while (iter.Advance()) {
39 // if (iter.IsWord()) { 48 // if (iter.IsWord()) {
40 // // region [iter.prev(),iter.pos()) contains a word. 49 // // region [iter.prev(),iter.pos()) contains a word.
41 // VLOG(1) << "word: " << iter.GetString(); 50 // VLOG(1) << "word: " << iter.GetString();
42 // } 51 // }
43 // } 52 // }
44 53
45 namespace base { 54 namespace base {
46 55
47 class BreakIterator { 56 class BreakIterator {
48 public: 57 public:
49 enum BreakType { 58 enum BreakType {
50 BREAK_WORD, 59 BREAK_WORD,
51 BREAK_SPACE, 60 BREAK_LINE,
61 // TODO(jshin): Remove this after reviewing call sites.
62 // If call sites really need break only on space-like characters
63 // implement it separately.
64 BREAK_SPACE = BREAK_LINE,
52 BREAK_NEWLINE, 65 BREAK_NEWLINE,
53 }; 66 };
54 67
55 // Requires |str| to live as long as the BreakIterator does. 68 // Requires |str| to live as long as the BreakIterator does.
56 BreakIterator(const string16* str, BreakType break_type); 69 BreakIterator(const string16* str, BreakType break_type);
57 ~BreakIterator(); 70 ~BreakIterator();
58 71
59 // Init() must be called before any of the iterators are valid. 72 // Init() must be called before any of the iterators are valid.
60 // Returns false if ICU failed to initialize. 73 // Returns false if ICU failed to initialize.
61 bool Init(); 74 bool Init();
62 75
63 // Return the current break position within the string, 76 // Return the current break position within the string,
64 // or BreakIterator::npos when done. 77 // or BreakIterator::npos when done.
65 size_t pos() const { return pos_; } 78 size_t pos() const { return pos_; }
66 79
67 // Return the value of pos() returned before Advance() was last called. 80 // Return the value of pos() returned before Advance() was last called.
68 size_t prev() const { return prev_; } 81 size_t prev() const { return prev_; }
69 82
70 // Advance to the next break. Returns false if we've run past the end of 83 // Advance to the next break. Returns false if we've run past the end of
71 // the string. (Note that the very last "break" is after the final 84 // the string. (Note that the very last "break" is after the final
72 // character in the string, and when we advance to that position it's the 85 // character in the string, and when we advance to that position it's the
73 // last time Advance() returns true.) 86 // last time Advance() returns true.)
74 bool Advance(); 87 bool Advance();
75 88
76 // Under BREAK_WORD mode, returns true if the break we just hit is the 89 // Under BREAK_WORD mode, returns true if the break we just hit is the
77 // end of a word. (Otherwise, the break iterator just skipped over e.g. 90 // end of a word. (Otherwise, the break iterator just skipped over e.g.
78 // whitespace or punctuation.) Under BREAK_SPACE and BREAK_NEWLINE modes, 91 // whitespace or punctuation.) Under BREAK_LINE and BREAK_NEWLINE modes,
79 // this distinction doesn't apply and it always retuns false. 92 // this distinction doesn't apply and it always retuns false.
80 bool IsWord() const; 93 bool IsWord() const;
81 94
82 // Return the string between prev() and pos(). 95 // Return the string between prev() and pos().
83 // Advance() must have been called successfully at least once 96 // Advance() must have been called successfully at least once
84 // for pos() to have advanced to somewhere useful. 97 // for pos() to have advanced to somewhere useful.
85 string16 GetString() const; 98 string16 GetString() const;
86 99
87 private: 100 private:
88 // ICU iterator, avoiding ICU ubrk.h dependence. 101 // ICU iterator, avoiding ICU ubrk.h dependence.
(...skipping 10 matching lines...) Expand all
99 112
100 // Previous and current iterator positions. 113 // Previous and current iterator positions.
101 size_t prev_, pos_; 114 size_t prev_, pos_;
102 115
103 DISALLOW_COPY_AND_ASSIGN(BreakIterator); 116 DISALLOW_COPY_AND_ASSIGN(BreakIterator);
104 }; 117 };
105 118
106 } // namespace base 119 } // namespace base
107 120
108 #endif // BASE_I18N_BREAK_ITERATOR_H__ 121 #endif // BASE_I18N_BREAK_ITERATOR_H__
OLDNEW
« no previous file with comments | « no previous file | base/i18n/break_iterator.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698