Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(158)

Side by Side Diff: source/i18n/coleitr.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/chnsecal.cpp ('k') | source/i18n/coll.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and * 3 * Copyright (C) 1996-2014, International Business Machines Corporation and
4 * others. All Rights Reserved. * 4 * others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 */ 6 */
7 7
8 /* 8 /*
9 * File coleitr.cpp 9 * File coleitr.cpp
10 * 10 *
11 *
12 *
13 * Created by: Helena Shih 11 * Created by: Helena Shih
14 * 12 *
15 * Modification History: 13 * Modification History:
16 * 14 *
17 * Date Name Description 15 * Date Name Description
18 * 16 *
19 * 6/23/97 helena Adding comments to make code more readable. 17 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.ja va 18 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.ja va
21 * 12/10/99 aliu Ported Thai collation support from Java. 19 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) 20 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is 21 * 02/19/01 swquek Removed CollationElementIterator() since it is
24 * private constructor and no calls are made to it 22 * private constructor and no calls are made to it
23 * 2012-2014 markus Rewritten in C++ again.
25 */ 24 */
26 25
27 #include "unicode/utypes.h" 26 #include "unicode/utypes.h"
28 27
29 #if !UCONFIG_NO_COLLATION 28 #if !UCONFIG_NO_COLLATION
30 29
31 #include "unicode/coleitr.h" 30 #include "unicode/coleitr.h"
31 #include "unicode/tblcoll.h"
32 #include "unicode/ustring.h" 32 #include "unicode/ustring.h"
33 #include "ucol_imp.h" 33 #include "cmemory.h"
34 #include "collation.h"
35 #include "collationdata.h"
36 #include "collationiterator.h"
37 #include "collationsets.h"
38 #include "collationtailoring.h"
34 #include "uassert.h" 39 #include "uassert.h"
35 #include "cmemory.h" 40 #include "uhash.h"
36 41 #include "utf16collationiterator.h"
42 #include "uvectr32.h"
37 43
38 /* Constants --------------------------------------------------------------- */ 44 /* Constants --------------------------------------------------------------- */
39 45
40 U_NAMESPACE_BEGIN 46 U_NAMESPACE_BEGIN
41 47
42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) 48 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
43 49
44 /* CollationElementIterator public constructor/destructor ------------------ */ 50 /* CollationElementIterator public constructor/destructor ------------------ */
45 51
46 CollationElementIterator::CollationElementIterator( 52 CollationElementIterator::CollationElementIterator(
47 const CollationElementIterator& other) 53 const CollationElementIterator& other)
48 : UObject(other), isDataOwned_(TRUE) 54 : UObject(other), iter_(NULL), rbc_(NULL), otherHalf_(0), dir_(0), offse ts_(NULL) {
49 {
50 UErrorCode status = U_ZERO_ERROR;
51 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
52 &status);
53
54 *this = other; 55 *this = other;
55 } 56 }
56 57
57 CollationElementIterator::~CollationElementIterator() 58 CollationElementIterator::~CollationElementIterator()
58 { 59 {
59 if (isDataOwned_) { 60 delete iter_;
60 ucol_closeElements(m_data_); 61 delete offsets_;
61 }
62 } 62 }
63 63
64 /* CollationElementIterator public methods --------------------------------- */ 64 /* CollationElementIterator public methods --------------------------------- */
65 65
66 namespace {
67
68 uint32_t getFirstHalf(uint32_t p, uint32_t lower32) {
69 return (p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xf f);
70 }
71 uint32_t getSecondHalf(uint32_t p, uint32_t lower32) {
72 return (p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
73 }
74 UBool ceNeedsTwoParts(int64_t ce) {
75 return (ce & INT64_C(0xffff00ff003f)) != 0;
76 }
77
78 } // namespace
79
66 int32_t CollationElementIterator::getOffset() const 80 int32_t CollationElementIterator::getOffset() const
67 { 81 {
68 return ucol_getOffset(m_data_); 82 if (dir_ < 0 && offsets_ != NULL && !offsets_->isEmpty()) {
83 // CollationIterator::previousCE() decrements the CEs length
84 // while it pops CEs from its internal buffer.
85 int32_t i = iter_->getCEsLength();
86 if (otherHalf_ != 0) {
87 // Return the trailing CE offset while we are in the middle of a 64- bit CE.
88 ++i;
89 }
90 U_ASSERT(i < offsets_->size());
91 return offsets_->elementAti(i);
92 }
93 return iter_->getOffset();
69 } 94 }
70 95
71 /** 96 /**
72 * Get the ordering priority of the next character in the string. 97 * Get the ordering priority of the next character in the string.
73 * @return the next character's ordering. Returns NULLORDER if an error has 98 * @return the next character's ordering. Returns NULLORDER if an error has
74 * occured or if the end of string has been reached 99 * occured or if the end of string has been reached
75 */ 100 */
76 int32_t CollationElementIterator::next(UErrorCode& status) 101 int32_t CollationElementIterator::next(UErrorCode& status)
77 { 102 {
78 return ucol_next(m_data_, &status); 103 if (U_FAILURE(status)) { return NULLORDER; }
104 if (dir_ > 1) {
105 // Continue forward iteration. Test this first.
106 if (otherHalf_ != 0) {
107 uint32_t oh = otherHalf_;
108 otherHalf_ = 0;
109 return oh;
110 }
111 } else if (dir_ == 1) {
112 // next() after setOffset()
113 dir_ = 2;
114 } else if (dir_ == 0) {
115 // The iter_ is already reset to the start of the text.
116 dir_ = 2;
117 } else /* dir_ < 0 */ {
118 // illegal change of direction
119 status = U_INVALID_STATE_ERROR;
120 return NULLORDER;
121 }
122 // No need to keep all CEs in the buffer when we iterate.
123 iter_->clearCEsIfNoneRemaining();
124 int64_t ce = iter_->nextCE(status);
125 if (ce == Collation::NO_CE) { return NULLORDER; }
126 // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits .
127 uint32_t p = (uint32_t)(ce >> 32);
128 uint32_t lower32 = (uint32_t)ce;
129 uint32_t firstHalf = getFirstHalf(p, lower32);
130 uint32_t secondHalf = getSecondHalf(p, lower32);
131 if (secondHalf != 0) {
132 otherHalf_ = secondHalf | 0xc0; // continuation CE
133 }
134 return firstHalf;
79 } 135 }
80 136
81 UBool CollationElementIterator::operator!=( 137 UBool CollationElementIterator::operator!=(
82 const CollationElementIterator& other) const 138 const CollationElementIterator& other) const
83 { 139 {
84 return !(*this == other); 140 return !(*this == other);
85 } 141 }
86 142
87 UBool CollationElementIterator::operator==( 143 UBool CollationElementIterator::operator==(
88 const CollationElementIterator& that) const 144 const CollationElementIterator& that) const
89 { 145 {
90 if (this == &that || m_data_ == that.m_data_) { 146 if (this == &that) {
91 return TRUE; 147 return TRUE;
92 } 148 }
93 149
94 // option comparison 150 return
95 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) 151 (rbc_ == that.rbc_ || *rbc_ == *that.rbc_) &&
96 { 152 otherHalf_ == that.otherHalf_ &&
97 return FALSE; 153 normalizeDir() == that.normalizeDir() &&
98 } 154 string_ == that.string_ &&
99 155 *iter_ == *that.iter_;
100 // the constructor and setText always sets a length
101 // and we only compare the string not the contents of the normalization
102 // buffer
103 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_. string);
104 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iter atordata_.string);
105
106 if (thislength != thatlength) {
107 return FALSE;
108 }
109
110 if (uprv_memcmp(m_data_->iteratordata_.string,
111 that.m_data_->iteratordata_.string,
112 thislength * U_SIZEOF_UCHAR) != 0) {
113 return FALSE;
114 }
115 if (getOffset() != that.getOffset()) {
116 return FALSE;
117 }
118
119 // checking normalization buffer
120 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
121 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
122 return FALSE;
123 }
124 // both are in the normalization buffer
125 if (m_data_->iteratordata_.pos
126 - m_data_->iteratordata_.writableBuffer.getBuffer()
127 != that.m_data_->iteratordata_.pos
128 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
129 // not in the same position in the normalization buffer
130 return FALSE;
131 }
132 }
133 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
134 return FALSE;
135 }
136 // checking ce position
137 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
138 == (that.m_data_->iteratordata_.CEpos
139 - that.m_data_->iteratordata_.CEs);
140 } 156 }
141 157
142 /** 158 /**
143 * Get the ordering priority of the previous collation element in the string. 159 * Get the ordering priority of the previous collation element in the string.
144 * @param status the error code status. 160 * @param status the error code status.
145 * @return the previous element's ordering. Returns NULLORDER if an error has 161 * @return the previous element's ordering. Returns NULLORDER if an error has
146 * occured or if the start of string has been reached. 162 * occured or if the start of string has been reached.
147 */ 163 */
148 int32_t CollationElementIterator::previous(UErrorCode& status) 164 int32_t CollationElementIterator::previous(UErrorCode& status)
149 { 165 {
150 return ucol_previous(m_data_, &status); 166 if (U_FAILURE(status)) { return NULLORDER; }
167 if (dir_ < 0) {
168 // Continue backwards iteration. Test this first.
169 if (otherHalf_ != 0) {
170 uint32_t oh = otherHalf_;
171 otherHalf_ = 0;
172 return oh;
173 }
174 } else if (dir_ == 0) {
175 iter_->resetToOffset(string_.length());
176 dir_ = -1;
177 } else if (dir_ == 1) {
178 // previous() after setOffset()
179 dir_ = -1;
180 } else /* dir_ > 1 */ {
181 // illegal change of direction
182 status = U_INVALID_STATE_ERROR;
183 return NULLORDER;
184 }
185 if (offsets_ == NULL) {
186 offsets_ = new UVector32(status);
187 if (offsets_ == NULL) {
188 status = U_MEMORY_ALLOCATION_ERROR;
189 return NULLORDER;
190 }
191 }
192 // If we already have expansion CEs, then we also have offsets.
193 // Otherwise remember the trailing offset in case we need to
194 // write offsets for an artificial expansion.
195 int32_t limitOffset = iter_->getCEsLength() == 0 ? iter_->getOffset() : 0;
196 int64_t ce = iter_->previousCE(*offsets_, status);
197 if (ce == Collation::NO_CE) { return NULLORDER; }
198 // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits .
199 uint32_t p = (uint32_t)(ce >> 32);
200 uint32_t lower32 = (uint32_t)ce;
201 uint32_t firstHalf = getFirstHalf(p, lower32);
202 uint32_t secondHalf = getSecondHalf(p, lower32);
203 if (secondHalf != 0) {
204 if (offsets_->isEmpty()) {
205 // When we convert a single 64-bit CE into two 32-bit CEs,
206 // we need to make this artificial expansion behave like a normal ex pansion.
207 // See CollationIterator::previousCE().
208 offsets_->addElement(iter_->getOffset(), status);
209 offsets_->addElement(limitOffset, status);
210 }
211 otherHalf_ = firstHalf;
212 return secondHalf | 0xc0; // continuation CE
213 }
214 return firstHalf;
151 } 215 }
152 216
153 /** 217 /**
154 * Resets the cursor to the beginning of the string. 218 * Resets the cursor to the beginning of the string.
155 */ 219 */
156 void CollationElementIterator::reset() 220 void CollationElementIterator::reset()
157 { 221 {
158 ucol_reset(m_data_); 222 iter_ ->resetToOffset(0);
223 otherHalf_ = 0;
224 dir_ = 0;
159 } 225 }
160 226
161 void CollationElementIterator::setOffset(int32_t newOffset, 227 void CollationElementIterator::setOffset(int32_t newOffset,
162 UErrorCode& status) 228 UErrorCode& status)
163 { 229 {
164 ucol_setOffset(m_data_, newOffset, &status); 230 if (U_FAILURE(status)) { return; }
231 if (0 < newOffset && newOffset < string_.length()) {
232 int32_t offset = newOffset;
233 do {
234 UChar c = string_.charAt(offset);
235 if (!rbc_->isUnsafe(c) ||
236 (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)) )) {
237 break;
238 }
239 // Back up to before this unsafe character.
240 --offset;
241 } while (offset > 0);
242 if (offset < newOffset) {
243 // We might have backed up more than necessary.
244 // For example, contractions "ch" and "cu" make both 'h' and 'u' uns afe,
245 // but for text "chu" setOffset(2) should remain at 2
246 // although we initially back up to offset 0.
247 // Find the last safe offset no greater than newOffset by iterating forward.
248 int32_t lastSafeOffset = offset;
249 do {
250 iter_->resetToOffset(lastSafeOffset);
251 do {
252 iter_->nextCE(status);
253 if (U_FAILURE(status)) { return; }
254 } while ((offset = iter_->getOffset()) == lastSafeOffset);
255 if (offset <= newOffset) {
256 lastSafeOffset = offset;
257 }
258 } while (offset < newOffset);
259 newOffset = lastSafeOffset;
260 }
261 }
262 iter_->resetToOffset(newOffset);
263 otherHalf_ = 0;
264 dir_ = 1;
165 } 265 }
166 266
167 /** 267 /**
168 * Sets the source to the new source string. 268 * Sets the source to the new source string.
169 */ 269 */
170 void CollationElementIterator::setText(const UnicodeString& source, 270 void CollationElementIterator::setText(const UnicodeString& source,
171 UErrorCode& status) 271 UErrorCode& status)
172 { 272 {
173 if (U_FAILURE(status)) { 273 if (U_FAILURE(status)) {
174 return; 274 return;
175 } 275 }
176 276
177 int32_t length = source.length(); 277 string_ = source;
178 UChar *string = NULL; 278 const UChar *s = string_.getBuffer();
179 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { 279 CollationIterator *newIter;
180 uprv_free((UChar *)m_data_->iteratordata_.string); 280 UBool numeric = rbc_->settings->isNumeric();
281 if (rbc_->settings->dontCheckFCD()) {
282 newIter = new UTF16CollationIterator(rbc_->data, numeric, s, s, s + stri ng_.length());
283 } else {
284 newIter = new FCDUTF16CollationIterator(rbc_->data, numeric, s, s, s + s tring_.length());
181 } 285 }
182 m_data_->isWritable = TRUE; 286 if (newIter == NULL) {
183 if (length > 0) { 287 status = U_MEMORY_ALLOCATION_ERROR;
184 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 288 return;
185 /* test for NULL */
186 if (string == NULL) {
187 status = U_MEMORY_ALLOCATION_ERROR;
188 return;
189 }
190 u_memcpy(string, source.getBuffer(), length);
191 } 289 }
192 else { 290 delete iter_;
193 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 291 iter_ = newIter;
194 /* test for NULL */ 292 otherHalf_ = 0;
195 if (string == NULL) { 293 dir_ = 0;
196 status = U_MEMORY_ALLOCATION_ERROR;
197 return;
198 }
199 *string = 0;
200 }
201 /* Free offsetBuffer before initializing it. */
202 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
203 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
204 &m_data_->iteratordata_, &status);
205
206 m_data_->reset_ = TRUE;
207 } 294 }
208 295
209 // Sets the source to the new character iterator. 296 // Sets the source to the new character iterator.
210 void CollationElementIterator::setText(CharacterIterator& source, 297 void CollationElementIterator::setText(CharacterIterator& source,
211 UErrorCode& status) 298 UErrorCode& status)
212 { 299 {
213 if (U_FAILURE(status)) 300 if (U_FAILURE(status))
214 return; 301 return;
215 302
216 int32_t length = source.getLength(); 303 source.getText(string_);
217 UChar *buffer = NULL; 304 setText(string_, status);
218
219 if (length == 0) {
220 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
221 /* test for NULL */
222 if (buffer == NULL) {
223 status = U_MEMORY_ALLOCATION_ERROR;
224 return;
225 }
226 *buffer = 0;
227 }
228 else {
229 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
230 /* test for NULL */
231 if (buffer == NULL) {
232 status = U_MEMORY_ALLOCATION_ERROR;
233 return;
234 }
235 /*
236 Using this constructor will prevent buffer from being removed when
237 string gets removed
238 */
239 UnicodeString string;
240 source.getText(string);
241 u_memcpy(buffer, string.getBuffer(), length);
242 }
243
244 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
245 uprv_free((UChar *)m_data_->iteratordata_.string);
246 }
247 m_data_->isWritable = TRUE;
248 /* Free offsetBuffer before initializing it. */
249 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
250 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
251 &m_data_->iteratordata_, &status);
252 m_data_->reset_ = TRUE;
253 } 305 }
254 306
255 int32_t CollationElementIterator::strengthOrder(int32_t order) const 307 int32_t CollationElementIterator::strengthOrder(int32_t order) const
256 { 308 {
257 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); 309 UColAttributeValue s = (UColAttributeValue)rbc_->settings->getStrength();
258 // Mask off the unwanted differences. 310 // Mask off the unwanted differences.
259 if (s == UCOL_PRIMARY) { 311 if (s == UCOL_PRIMARY) {
260 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; 312 order &= 0xffff0000;
261 } 313 }
262 else if (s == UCOL_SECONDARY) { 314 else if (s == UCOL_SECONDARY) {
263 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; 315 order &= 0xffffff00;
264 } 316 }
265 317
266 return order; 318 return order;
267 } 319 }
268 320
269 /* CollationElementIterator private constructors/destructors --------------- */ 321 /* CollationElementIterator private constructors/destructors --------------- */
270 322
271 /** 323 /**
272 * This is the "real" constructor for this class; it constructs an iterator 324 * This is the "real" constructor for this class; it constructs an iterator
273 * over the source text using the specified collator 325 * over the source text using the specified collator
274 */ 326 */
275 CollationElementIterator::CollationElementIterator( 327 CollationElementIterator::CollationElementIterator(
276 const UnicodeString& sourceText, 328 const UnicodeString &source,
277 const RuleBasedCollator* order, 329 const RuleBasedCollator *coll,
278 UErrorCode& status) 330 UErrorCode &status)
279 : isDataOwned_(TRUE) 331 : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
280 { 332 setText(source, status);
281 if (U_FAILURE(status)) {
282 return;
283 }
284
285 int32_t length = sourceText.length();
286 UChar *string = NULL;
287
288 if (length > 0) {
289 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
290 /* test for NULL */
291 if (string == NULL) {
292 status = U_MEMORY_ALLOCATION_ERROR;
293 return;
294 }
295 /*
296 Using this constructor will prevent buffer from being removed when
297 string gets removed
298 */
299 u_memcpy(string, sourceText.getBuffer(), length);
300 }
301 else {
302 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
303 /* test for NULL */
304 if (string == NULL) {
305 status = U_MEMORY_ALLOCATION_ERROR;
306 return;
307 }
308 *string = 0;
309 }
310 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
311
312 /* Test for buffer overflows */
313 if (U_FAILURE(status)) {
314 return;
315 }
316 m_data_->isWritable = TRUE;
317 } 333 }
318 334
319 /** 335 /**
320 * This is the "real" constructor for this class; it constructs an iterator over 336 * This is the "real" constructor for this class; it constructs an iterator over
321 * the source text using the specified collator 337 * the source text using the specified collator
322 */ 338 */
323 CollationElementIterator::CollationElementIterator( 339 CollationElementIterator::CollationElementIterator(
324 const CharacterIterator& sourceText, 340 const CharacterIterator &source,
325 const RuleBasedCollator* order, 341 const RuleBasedCollator *coll,
326 UErrorCode& status) 342 UErrorCode &status)
327 : isDataOwned_(TRUE) 343 : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
328 { 344 // We only call source.getText() which should be const anyway.
329 if (U_FAILURE(status)) 345 setText(const_cast<CharacterIterator &>(source), status);
330 return;
331
332 // **** should I just drop this test? ****
333 /*
334 if ( sourceText.endIndex() != 0 )
335 {
336 // A CollationElementIterator is really a two-layered beast.
337 // Internally it uses a Normalizer to munge the source text into a form
338 // where all "composed" Unicode characters (such as \u00FC) are split in to a
339 // normal character and a combining accent character.
340 // Afterward, CollationElementIterator does its own processing to handle
341 // expanding and contracting collation sequences, ignorables, and so on.
342
343 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
344 ? Normalizer::NO_OP : order->getDecomposition();
345
346 text = new Normalizer(sourceText, decomp);
347 if (text == NULL)
348 status = U_MEMORY_ALLOCATION_ERROR;
349 }
350 */
351 int32_t length = sourceText.getLength();
352 UChar *buffer;
353 if (length > 0) {
354 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
355 /* test for NULL */
356 if (buffer == NULL) {
357 status = U_MEMORY_ALLOCATION_ERROR;
358 return;
359 }
360 /*
361 Using this constructor will prevent buffer from being removed when
362 string gets removed
363 */
364 UnicodeString string(buffer, length, length);
365 ((CharacterIterator &)sourceText).getText(string);
366 const UChar *temp = string.getBuffer();
367 u_memcpy(buffer, temp, length);
368 }
369 else {
370 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
371 /* test for NULL */
372 if (buffer == NULL) {
373 status = U_MEMORY_ALLOCATION_ERROR;
374 return;
375 }
376 *buffer = 0;
377 }
378 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
379
380 /* Test for buffer overflows */
381 if (U_FAILURE(status)) {
382 return;
383 }
384 m_data_->isWritable = TRUE;
385 } 346 }
386 347
387 /* CollationElementIterator protected methods ----------------------------- */ 348 /* CollationElementIterator private methods -------------------------------- */
388 349
389 const CollationElementIterator& CollationElementIterator::operator=( 350 const CollationElementIterator& CollationElementIterator::operator=(
390 const CollationElementIterator& other) 351 const CollationElementIterator& other)
391 { 352 {
392 if (this != &other) 353 if (this == &other) {
393 { 354 return *this;
394 UCollationElements *ucolelem = this->m_data_;
395 UCollationElements *otherucolelem = other.m_data_;
396 collIterate *coliter = &(ucolelem->iteratordata_);
397 collIterate *othercoliter = &(otherucolelem->iteratordata_);
398 int length = 0;
399
400 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
401 // the normalization buffer
402 length = (int)(othercoliter->endp - othercoliter->string);
403
404 ucolelem->reset_ = otherucolelem->reset_;
405 ucolelem->isWritable = TRUE;
406
407 /* create a duplicate of string */
408 if (length > 0) {
409 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
410 if(coliter->string != NULL) {
411 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
412 length * U_SIZEOF_UCHAR);
413 } else { // Error: couldn't allocate memory. No copying should be do ne
414 length = 0;
415 }
416 }
417 else {
418 coliter->string = NULL;
419 }
420
421 /* start and end of string */
422 coliter->endp = coliter->string == NULL ? NULL : coliter->string + lengt h;
423
424 /* handle writable buffer here */
425
426 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
427 coliter->writableBuffer = othercoliter->writableBuffer;
428 coliter->writableBuffer.getTerminatedBuffer();
429 }
430
431 /* current position */
432 if (othercoliter->pos >= othercoliter->string &&
433 othercoliter->pos <= othercoliter->endp)
434 {
435 U_ASSERT(coliter->string != NULL);
436 coliter->pos = coliter->string +
437 (othercoliter->pos - othercoliter->string);
438 }
439 else {
440 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
441 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
442 }
443
444 /* CE buffer */
445 int32_t CEsize;
446 if (coliter->extendCEs) {
447 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL _EXPAND_CE_BUFFER_SIZE);
448 CEsize = sizeof(othercoliter->extendCEs);
449 if (CEsize > 0) {
450 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
451 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize) ;
452 }
453 coliter->toReturn = coliter->extendCEs +
454 (othercoliter->toReturn - othercoliter->extendCEs);
455 coliter->CEpos = coliter->extendCEs + CEsize;
456 } else {
457 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
458 if (CEsize > 0) {
459 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
460 }
461 coliter->toReturn = coliter->CEs +
462 (othercoliter->toReturn - othercoliter->CEs);
463 coliter->CEpos = coliter->CEs + CEsize;
464 }
465
466 if (othercoliter->fcdPosition != NULL) {
467 U_ASSERT(coliter->string != NULL);
468 coliter->fcdPosition = coliter->string +
469 (othercoliter->fcdPosition
470 - othercoliter->string);
471 }
472 else {
473 coliter->fcdPosition = NULL;
474 }
475 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
476 coliter->origFlags = othercoliter->origFlags;
477 coliter->coll = othercoliter->coll;
478 this->isDataOwned_ = TRUE;
479 } 355 }
480 356
357 CollationIterator *newIter;
358 const FCDUTF16CollationIterator *otherFCDIter =
359 dynamic_cast<const FCDUTF16CollationIterator *>(other.iter_);
360 if(otherFCDIter != NULL) {
361 newIter = new FCDUTF16CollationIterator(*otherFCDIter, string_.getBuffer ());
362 } else {
363 const UTF16CollationIterator *otherIter =
364 dynamic_cast<const UTF16CollationIterator *>(other.iter_);
365 if(otherIter != NULL) {
366 newIter = new UTF16CollationIterator(*otherIter, string_.getBuffer() );
367 } else {
368 newIter = NULL;
369 }
370 }
371 if(newIter != NULL) {
372 delete iter_;
373 iter_ = newIter;
374 rbc_ = other.rbc_;
375 otherHalf_ = other.otherHalf_;
376 dir_ = other.dir_;
377
378 string_ = other.string_;
379 }
380 if(other.dir_ < 0 && other.offsets_ != NULL && !other.offsets_->isEmpty()) {
381 UErrorCode errorCode = U_ZERO_ERROR;
382 if(offsets_ == NULL) {
383 offsets_ = new UVector32(other.offsets_->size(), errorCode);
384 }
385 if(offsets_ != NULL) {
386 offsets_->assign(*other.offsets_, errorCode);
387 }
388 }
481 return *this; 389 return *this;
482 } 390 }
483 391
392 namespace {
393
394 class MaxExpSink : public ContractionsAndExpansions::CESink {
395 public:
396 MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {}
397 virtual ~MaxExpSink();
398 virtual void handleCE(int64_t /*ce*/) {}
399 virtual void handleExpansion(const int64_t ces[], int32_t length) {
400 if (length <= 1) {
401 // We do not need to add single CEs into the map.
402 return;
403 }
404 int32_t count = 0; // number of CE "halves"
405 for (int32_t i = 0; i < length; ++i) {
406 count += ceNeedsTwoParts(ces[i]) ? 2 : 1;
407 }
408 // last "half" of the last CE
409 int64_t ce = ces[length - 1];
410 uint32_t p = (uint32_t)(ce >> 32);
411 uint32_t lower32 = (uint32_t)ce;
412 uint32_t lastHalf = getSecondHalf(p, lower32);
413 if (lastHalf == 0) {
414 lastHalf = getFirstHalf(p, lower32);
415 U_ASSERT(lastHalf != 0);
416 } else {
417 lastHalf |= 0xc0; // old-style continuation CE
418 }
419 if (count > uhash_igeti(maxExpansions, (int32_t)lastHalf)) {
420 uhash_iputi(maxExpansions, (int32_t)lastHalf, count, &errorCode);
421 }
422 }
423
424 private:
425 UHashtable *maxExpansions;
426 UErrorCode &errorCode;
427 };
428
429 MaxExpSink::~MaxExpSink() {}
430
431 } // namespace
432
433 UHashtable *
434 CollationElementIterator::computeMaxExpansions(const CollationData *data, UError Code &errorCode) {
435 if (U_FAILURE(errorCode)) { return NULL; }
436 UHashtable *maxExpansions = uhash_open(uhash_hashLong, uhash_compareLong,
437 uhash_compareLong, &errorCode);
438 if (U_FAILURE(errorCode)) { return NULL; }
439 MaxExpSink sink(maxExpansions, errorCode);
440 ContractionsAndExpansions(NULL, NULL, &sink, TRUE).forData(data, errorCode);
441 if (U_FAILURE(errorCode)) {
442 uhash_close(maxExpansions);
443 return NULL;
444 }
445 return maxExpansions;
446 }
447
448 int32_t
449 CollationElementIterator::getMaxExpansion(int32_t order) const {
450 return getMaxExpansion(rbc_->tailoring->maxExpansions, order);
451 }
452
453 int32_t
454 CollationElementIterator::getMaxExpansion(const UHashtable *maxExpansions, int32 _t order) {
455 if (order == 0) { return 1; }
456 int32_t max;
457 if(maxExpansions != NULL && (max = uhash_igeti(maxExpansions, order)) != 0) {
458 return max;
459 }
460 if ((order & 0xc0) == 0xc0) {
461 // old-style continuation CE
462 return 2;
463 } else {
464 return 1;
465 }
466 }
467
484 U_NAMESPACE_END 468 U_NAMESPACE_END
485 469
486 #endif /* #if !UCONFIG_NO_COLLATION */ 470 #endif /* #if !UCONFIG_NO_COLLATION */
487
488 /* eof */
OLDNEW
« no previous file with comments | « source/i18n/chnsecal.cpp ('k') | source/i18n/coll.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698