OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/i18n/break_iterator.h" | 5 #include "base/i18n/break_iterator.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "third_party/icu/source/common/unicode/ubrk.h" | 8 #include "third_party/icu/source/common/unicode/ubrk.h" |
9 #include "third_party/icu/source/common/unicode/uchar.h" | 9 #include "third_party/icu/source/common/unicode/uchar.h" |
10 #include "third_party/icu/source/common/unicode/ustring.h" | 10 #include "third_party/icu/source/common/unicode/ustring.h" |
11 | 11 |
12 namespace base { | 12 namespace base { |
13 namespace i18n { | 13 namespace i18n { |
14 | 14 |
15 const size_t npos = -1; | 15 const size_t npos = -1; |
16 | 16 |
17 BreakIterator::BreakIterator(const string16& str, BreakType break_type) | 17 BreakIterator::BreakIterator(const string16& str, BreakType break_type) |
18 : iter_(NULL), | 18 : iter_(NULL), |
19 string_(str), | 19 string_(str), |
20 break_type_(break_type), | 20 break_type_(break_type), |
21 prev_(npos), | 21 prev_(npos), |
22 pos_(0) { | 22 pos_(0) { |
23 } | 23 } |
24 | 24 |
| 25 BreakIterator::BreakIterator(const string16& str, const string16& rules) |
| 26 : iter_(NULL), |
| 27 string_(str), |
| 28 rules_(rules), |
| 29 break_type_(RULE_BASED), |
| 30 prev_(npos), |
| 31 pos_(0) { |
| 32 } |
| 33 |
25 BreakIterator::~BreakIterator() { | 34 BreakIterator::~BreakIterator() { |
26 if (iter_) | 35 if (iter_) |
27 ubrk_close(static_cast<UBreakIterator*>(iter_)); | 36 ubrk_close(static_cast<UBreakIterator*>(iter_)); |
28 } | 37 } |
29 | 38 |
30 bool BreakIterator::Init() { | 39 bool BreakIterator::Init() { |
31 UErrorCode status = U_ZERO_ERROR; | 40 UErrorCode status = U_ZERO_ERROR; |
| 41 UParseError parse_error; |
32 UBreakIteratorType break_type; | 42 UBreakIteratorType break_type; |
33 switch (break_type_) { | 43 switch (break_type_) { |
34 case BREAK_CHARACTER: | 44 case BREAK_CHARACTER: |
35 break_type = UBRK_CHARACTER; | 45 break_type = UBRK_CHARACTER; |
36 break; | 46 break; |
37 case BREAK_WORD: | 47 case BREAK_WORD: |
38 break_type = UBRK_WORD; | 48 break_type = UBRK_WORD; |
39 break; | 49 break; |
40 case BREAK_LINE: | 50 case BREAK_LINE: |
41 case BREAK_NEWLINE: | 51 case BREAK_NEWLINE: |
| 52 case RULE_BASED: // (Keep compiler happy, break_type not used in this case) |
42 break_type = UBRK_LINE; | 53 break_type = UBRK_LINE; |
43 break; | 54 break; |
44 default: | 55 default: |
45 NOTREACHED() << "invalid break_type_"; | 56 NOTREACHED() << "invalid break_type_"; |
46 return false; | 57 return false; |
47 } | 58 } |
48 iter_ = ubrk_open(break_type, NULL, | 59 if (break_type_ == RULE_BASED) { |
49 string_.data(), static_cast<int32_t>(string_.size()), | 60 iter_ = ubrk_openRules(rules_.c_str(), |
50 &status); | 61 static_cast<int32_t>(rules_.length()), |
| 62 string_.data(), |
| 63 static_cast<int32_t>(string_.size()), |
| 64 &parse_error, |
| 65 &status); |
| 66 if (U_FAILURE(status)) { |
| 67 NOTREACHED() << "ubrk_openRules failed to parse rule string at line " |
| 68 << parse_error.line << ", offset " << parse_error.offset; |
| 69 } |
| 70 } else { |
| 71 iter_ = ubrk_open(break_type, |
| 72 NULL, |
| 73 string_.data(), |
| 74 static_cast<int32_t>(string_.size()), |
| 75 &status); |
| 76 if (U_FAILURE(status)) { |
| 77 NOTREACHED() << "ubrk_open failed"; |
| 78 } |
| 79 } |
| 80 |
51 if (U_FAILURE(status)) { | 81 if (U_FAILURE(status)) { |
52 NOTREACHED() << "ubrk_open failed"; | |
53 return false; | 82 return false; |
54 } | 83 } |
| 84 |
55 // Move the iterator to the beginning of the string. | 85 // Move the iterator to the beginning of the string. |
56 ubrk_first(static_cast<UBreakIterator*>(iter_)); | 86 ubrk_first(static_cast<UBreakIterator*>(iter_)); |
57 return true; | 87 return true; |
58 } | 88 } |
59 | 89 |
60 bool BreakIterator::Advance() { | 90 bool BreakIterator::Advance() { |
61 int32_t pos; | 91 int32_t pos; |
62 int32_t status; | 92 int32_t status; |
63 prev_ = pos_; | 93 prev_ = pos_; |
64 switch (break_type_) { | 94 switch (break_type_) { |
65 case BREAK_CHARACTER: | 95 case BREAK_CHARACTER: |
66 case BREAK_WORD: | 96 case BREAK_WORD: |
67 case BREAK_LINE: | 97 case BREAK_LINE: |
| 98 case RULE_BASED: |
68 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); | 99 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); |
69 if (pos == UBRK_DONE) { | 100 if (pos == UBRK_DONE) { |
70 pos_ = npos; | 101 pos_ = npos; |
71 return false; | 102 return false; |
72 } | 103 } |
73 pos_ = static_cast<size_t>(pos); | 104 pos_ = static_cast<size_t>(pos); |
74 return true; | 105 return true; |
75 case BREAK_NEWLINE: | 106 case BREAK_NEWLINE: |
76 do { | 107 do { |
77 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); | 108 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); |
78 if (pos == UBRK_DONE) | 109 if (pos == UBRK_DONE) |
79 break; | 110 break; |
80 pos_ = static_cast<size_t>(pos); | 111 pos_ = static_cast<size_t>(pos); |
81 status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); | 112 status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); |
82 } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT); | 113 } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT); |
83 if (pos == UBRK_DONE && prev_ == pos_) { | 114 if (pos == UBRK_DONE && prev_ == pos_) { |
84 pos_ = npos; | 115 pos_ = npos; |
85 return false; | 116 return false; |
86 } | 117 } |
87 return true; | 118 return true; |
88 default: | 119 default: |
89 NOTREACHED() << "invalid break_type_"; | 120 NOTREACHED() << "invalid break_type_"; |
90 return false; | 121 return false; |
91 } | 122 } |
92 } | 123 } |
93 | 124 |
| 125 bool BreakIterator::SetText(const base::char16* text, const size_t length) { |
| 126 UErrorCode status = U_ZERO_ERROR; |
| 127 ubrk_setText(static_cast<UBreakIterator*>(iter_), |
| 128 text, length, &status); |
| 129 pos_ = 0; // implicit when ubrk_setText is done |
| 130 prev_ = npos; |
| 131 if (U_FAILURE(status)) { |
| 132 NOTREACHED() << "ubrk_setText failed"; |
| 133 return false; |
| 134 } |
| 135 return true; |
| 136 } |
| 137 |
94 bool BreakIterator::IsWord() const { | 138 bool BreakIterator::IsWord() const { |
95 int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); | 139 int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); |
96 return (break_type_ == BREAK_WORD && status != UBRK_WORD_NONE); | 140 if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) |
| 141 return false; |
| 142 return status != UBRK_WORD_NONE; |
97 } | 143 } |
98 | 144 |
99 bool BreakIterator::IsEndOfWord(size_t position) const { | 145 bool BreakIterator::IsEndOfWord(size_t position) const { |
100 if (break_type_ != BREAK_WORD) | 146 if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) |
101 return false; | 147 return false; |
102 | 148 |
103 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); | 149 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); |
104 UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); | 150 UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); |
105 int32_t status = ubrk_getRuleStatus(iter); | 151 int32_t status = ubrk_getRuleStatus(iter); |
106 return (!!boundary && status != UBRK_WORD_NONE); | 152 return (!!boundary && status != UBRK_WORD_NONE); |
107 } | 153 } |
108 | 154 |
109 bool BreakIterator::IsStartOfWord(size_t position) const { | 155 bool BreakIterator::IsStartOfWord(size_t position) const { |
110 if (break_type_ != BREAK_WORD) | 156 if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) |
111 return false; | 157 return false; |
112 | 158 |
113 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); | 159 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); |
114 UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); | 160 UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); |
115 ubrk_next(iter); | 161 ubrk_next(iter); |
116 int32_t next_status = ubrk_getRuleStatus(iter); | 162 int32_t next_status = ubrk_getRuleStatus(iter); |
117 return (!!boundary && next_status != UBRK_WORD_NONE); | 163 return (!!boundary && next_status != UBRK_WORD_NONE); |
118 } | 164 } |
119 | 165 |
120 string16 BreakIterator::GetString() const { | 166 string16 BreakIterator::GetString() const { |
121 DCHECK(prev_ != npos && pos_ != npos); | 167 DCHECK(prev_ != npos && pos_ != npos); |
122 return string_.substr(prev_, pos_ - prev_); | 168 return string_.substr(prev_, pos_ - prev_); |
123 } | 169 } |
124 | 170 |
125 } // namespace i18n | 171 } // namespace i18n |
126 } // namespace base | 172 } // namespace base |
OLD | NEW |