| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/i18n/break_iterator.h" | |
| 6 | |
| 7 #include "base/logging.h" | |
| 8 #include "third_party/icu/source/common/unicode/ubrk.h" | |
| 9 #include "third_party/icu/source/common/unicode/uchar.h" | |
| 10 #include "third_party/icu/source/common/unicode/ustring.h" | |
| 11 | |
| 12 namespace base { | |
| 13 namespace i18n { | |
| 14 | |
| 15 const size_t npos = static_cast<size_t>(-1); | |
| 16 | |
| 17 BreakIterator::BreakIterator(const StringPiece16& str, BreakType break_type) | |
| 18 : iter_(NULL), | |
| 19 string_(str), | |
| 20 break_type_(break_type), | |
| 21 prev_(npos), | |
| 22 pos_(0) { | |
| 23 } | |
| 24 | |
| 25 BreakIterator::BreakIterator(const StringPiece16& str, const string16& rules) | |
| 26 : iter_(NULL), | |
| 27 string_(str), | |
| 28 rules_(rules), | |
| 29 break_type_(RULE_BASED), | |
| 30 prev_(npos), | |
| 31 pos_(0) { | |
| 32 } | |
| 33 | |
| 34 BreakIterator::~BreakIterator() { | |
| 35 if (iter_) | |
| 36 ubrk_close(static_cast<UBreakIterator*>(iter_)); | |
| 37 } | |
| 38 | |
| 39 bool BreakIterator::Init() { | |
| 40 UErrorCode status = U_ZERO_ERROR; | |
| 41 UParseError parse_error; | |
| 42 UBreakIteratorType break_type; | |
| 43 switch (break_type_) { | |
| 44 case BREAK_CHARACTER: | |
| 45 break_type = UBRK_CHARACTER; | |
| 46 break; | |
| 47 case BREAK_WORD: | |
| 48 break_type = UBRK_WORD; | |
| 49 break; | |
| 50 case BREAK_LINE: | |
| 51 case BREAK_NEWLINE: | |
| 52 case RULE_BASED: // (Keep compiler happy, break_type not used in this case) | |
| 53 break_type = UBRK_LINE; | |
| 54 break; | |
| 55 default: | |
| 56 NOTREACHED() << "invalid break_type_"; | |
| 57 return false; | |
| 58 } | |
| 59 if (break_type_ == RULE_BASED) { | |
| 60 iter_ = ubrk_openRules(rules_.c_str(), | |
| 61 static_cast<int32_t>(rules_.length()), | |
| 62 string_.data(), | |
| 63 static_cast<int32_t>(string_.size()), | |
| 64 &parse_error, | |
| 65 &status); | |
| 66 if (U_FAILURE(status)) { | |
| 67 NOTREACHED() << "ubrk_openRules failed to parse rule string at line " | |
| 68 << parse_error.line << ", offset " << parse_error.offset; | |
| 69 } | |
| 70 } else { | |
| 71 iter_ = ubrk_open(break_type, | |
| 72 NULL, | |
| 73 string_.data(), | |
| 74 static_cast<int32_t>(string_.size()), | |
| 75 &status); | |
| 76 if (U_FAILURE(status)) { | |
| 77 NOTREACHED() << "ubrk_open failed for type " << break_type | |
| 78 << " with error " << status; | |
| 79 } | |
| 80 } | |
| 81 | |
| 82 if (U_FAILURE(status)) { | |
| 83 return false; | |
| 84 } | |
| 85 | |
| 86 // Move the iterator to the beginning of the string. | |
| 87 ubrk_first(static_cast<UBreakIterator*>(iter_)); | |
| 88 return true; | |
| 89 } | |
| 90 | |
| 91 bool BreakIterator::Advance() { | |
| 92 int32_t pos; | |
| 93 int32_t status; | |
| 94 prev_ = pos_; | |
| 95 switch (break_type_) { | |
| 96 case BREAK_CHARACTER: | |
| 97 case BREAK_WORD: | |
| 98 case BREAK_LINE: | |
| 99 case RULE_BASED: | |
| 100 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); | |
| 101 if (pos == UBRK_DONE) { | |
| 102 pos_ = npos; | |
| 103 return false; | |
| 104 } | |
| 105 pos_ = static_cast<size_t>(pos); | |
| 106 return true; | |
| 107 case BREAK_NEWLINE: | |
| 108 do { | |
| 109 pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); | |
| 110 if (pos == UBRK_DONE) | |
| 111 break; | |
| 112 pos_ = static_cast<size_t>(pos); | |
| 113 status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); | |
| 114 } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT); | |
| 115 if (pos == UBRK_DONE && prev_ == pos_) { | |
| 116 pos_ = npos; | |
| 117 return false; | |
| 118 } | |
| 119 return true; | |
| 120 default: | |
| 121 NOTREACHED() << "invalid break_type_"; | |
| 122 return false; | |
| 123 } | |
| 124 } | |
| 125 | |
| 126 bool BreakIterator::SetText(const base::char16* text, const size_t length) { | |
| 127 UErrorCode status = U_ZERO_ERROR; | |
| 128 ubrk_setText(static_cast<UBreakIterator*>(iter_), | |
| 129 text, length, &status); | |
| 130 pos_ = 0; // implicit when ubrk_setText is done | |
| 131 prev_ = npos; | |
| 132 if (U_FAILURE(status)) { | |
| 133 NOTREACHED() << "ubrk_setText failed"; | |
| 134 return false; | |
| 135 } | |
| 136 string_ = StringPiece16(text, length); | |
| 137 return true; | |
| 138 } | |
| 139 | |
| 140 bool BreakIterator::IsWord() const { | |
| 141 int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); | |
| 142 if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) | |
| 143 return false; | |
| 144 return status != UBRK_WORD_NONE; | |
| 145 } | |
| 146 | |
| 147 bool BreakIterator::IsEndOfWord(size_t position) const { | |
| 148 if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) | |
| 149 return false; | |
| 150 | |
| 151 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); | |
| 152 UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); | |
| 153 int32_t status = ubrk_getRuleStatus(iter); | |
| 154 return (!!boundary && status != UBRK_WORD_NONE); | |
| 155 } | |
| 156 | |
| 157 bool BreakIterator::IsStartOfWord(size_t position) const { | |
| 158 if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) | |
| 159 return false; | |
| 160 | |
| 161 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); | |
| 162 UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); | |
| 163 ubrk_next(iter); | |
| 164 int32_t next_status = ubrk_getRuleStatus(iter); | |
| 165 return (!!boundary && next_status != UBRK_WORD_NONE); | |
| 166 } | |
| 167 | |
| 168 bool BreakIterator::IsGraphemeBoundary(size_t position) const { | |
| 169 if (break_type_ != BREAK_CHARACTER) | |
| 170 return false; | |
| 171 | |
| 172 UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); | |
| 173 return !!ubrk_isBoundary(iter, static_cast<int32_t>(position)); | |
| 174 } | |
| 175 | |
| 176 string16 BreakIterator::GetString() const { | |
| 177 return GetStringPiece().as_string(); | |
| 178 } | |
| 179 | |
| 180 StringPiece16 BreakIterator::GetStringPiece() const { | |
| 181 DCHECK(prev_ != npos && pos_ != npos); | |
| 182 return string_.substr(prev_, pos_ - prev_); | |
| 183 } | |
| 184 | |
| 185 } // namespace i18n | |
| 186 } // namespace base | |
| OLD | NEW |