OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 1996-2010, International Business Machines Corporation and * |
| 4 * others. All Rights Reserved. * |
| 5 ******************************************************************************* |
| 6 */ |
| 7 |
| 8 /* |
| 9 * File coleitr.cpp |
| 10 * |
| 11 * |
| 12 * |
| 13 * Created by: Helena Shih |
| 14 * |
| 15 * Modification History: |
| 16 * |
| 17 * Date Name Description |
| 18 * |
| 19 * 6/23/97 helena Adding comments to make code more readable. |
| 20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.ja
va |
| 21 * 12/10/99 aliu Ported Thai collation support from Java. |
| 22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) |
| 23 * 02/19/01 swquek Removed CollationElementsIterator() since it is |
| 24 * private constructor and no calls are made to it |
| 25 */ |
| 26 |
| 27 #include "unicode/utypes.h" |
| 28 |
| 29 #if !UCONFIG_NO_COLLATION |
| 30 |
| 31 #include "unicode/coleitr.h" |
| 32 #include "unicode/ustring.h" |
| 33 #include "ucol_imp.h" |
| 34 #include "cmemory.h" |
| 35 |
| 36 |
| 37 /* Constants --------------------------------------------------------------- */ |
| 38 |
| 39 U_NAMESPACE_BEGIN |
| 40 |
| 41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) |
| 42 |
| 43 /* CollationElementIterator public constructor/destructor ------------------ */ |
| 44 |
| 45 CollationElementIterator::CollationElementIterator( |
| 46 const CollationElementIterator& other) |
| 47 : UObject(other), isDataOwned_(TRUE) |
| 48 { |
| 49 UErrorCode status = U_ZERO_ERROR; |
| 50 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, |
| 51 &status); |
| 52 |
| 53 *this = other; |
| 54 } |
| 55 |
| 56 CollationElementIterator::~CollationElementIterator() |
| 57 { |
| 58 if (isDataOwned_) { |
| 59 ucol_closeElements(m_data_); |
| 60 } |
| 61 } |
| 62 |
| 63 /* CollationElementIterator public methods --------------------------------- */ |
| 64 |
| 65 int32_t CollationElementIterator::getOffset() const |
| 66 { |
| 67 return ucol_getOffset(m_data_); |
| 68 } |
| 69 |
| 70 /** |
| 71 * Get the ordering priority of the next character in the string. |
| 72 * @return the next character's ordering. Returns NULLORDER if an error has |
| 73 * occured or if the end of string has been reached |
| 74 */ |
| 75 int32_t CollationElementIterator::next(UErrorCode& status) |
| 76 { |
| 77 return ucol_next(m_data_, &status); |
| 78 } |
| 79 |
| 80 UBool CollationElementIterator::operator!=( |
| 81 const CollationElementIterator& other) const |
| 82 { |
| 83 return !(*this == other); |
| 84 } |
| 85 |
| 86 UBool CollationElementIterator::operator==( |
| 87 const CollationElementIterator& that) const |
| 88 { |
| 89 if (this == &that || m_data_ == that.m_data_) { |
| 90 return TRUE; |
| 91 } |
| 92 |
| 93 // option comparison |
| 94 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) |
| 95 { |
| 96 return FALSE; |
| 97 } |
| 98 |
| 99 // the constructor and setText always sets a length |
| 100 // and we only compare the string not the contents of the normalization |
| 101 // buffer |
| 102 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.
string); |
| 103 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iter
atordata_.string); |
| 104 |
| 105 if (thislength != thatlength) { |
| 106 return FALSE; |
| 107 } |
| 108 |
| 109 if (uprv_memcmp(m_data_->iteratordata_.string, |
| 110 that.m_data_->iteratordata_.string, |
| 111 thislength * U_SIZEOF_UCHAR) != 0) { |
| 112 return FALSE; |
| 113 } |
| 114 if (getOffset() != that.getOffset()) { |
| 115 return FALSE; |
| 116 } |
| 117 |
| 118 // checking normalization buffer |
| 119 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { |
| 120 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) { |
| 121 return FALSE; |
| 122 } |
| 123 // both are in the normalization buffer |
| 124 if (m_data_->iteratordata_.pos |
| 125 - m_data_->iteratordata_.writableBuffer.getBuffer() |
| 126 != that.m_data_->iteratordata_.pos |
| 127 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) { |
| 128 // not in the same position in the normalization buffer |
| 129 return FALSE; |
| 130 } |
| 131 } |
| 132 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { |
| 133 return FALSE; |
| 134 } |
| 135 // checking ce position |
| 136 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs) |
| 137 == (that.m_data_->iteratordata_.CEpos |
| 138 - that.m_data_->iteratordata_.CEs); |
| 139 } |
| 140 |
| 141 /** |
| 142 * Get the ordering priority of the previous collation element in the string. |
| 143 * @param status the error code status. |
| 144 * @return the previous element's ordering. Returns NULLORDER if an error has |
| 145 * occured or if the start of string has been reached. |
| 146 */ |
| 147 int32_t CollationElementIterator::previous(UErrorCode& status) |
| 148 { |
| 149 return ucol_previous(m_data_, &status); |
| 150 } |
| 151 |
| 152 /** |
| 153 * Resets the cursor to the beginning of the string. |
| 154 */ |
| 155 void CollationElementIterator::reset() |
| 156 { |
| 157 ucol_reset(m_data_); |
| 158 } |
| 159 |
| 160 void CollationElementIterator::setOffset(int32_t newOffset, |
| 161 UErrorCode& status) |
| 162 { |
| 163 ucol_setOffset(m_data_, newOffset, &status); |
| 164 } |
| 165 |
| 166 /** |
| 167 * Sets the source to the new source string. |
| 168 */ |
| 169 void CollationElementIterator::setText(const UnicodeString& source, |
| 170 UErrorCode& status) |
| 171 { |
| 172 if (U_FAILURE(status)) { |
| 173 return; |
| 174 } |
| 175 |
| 176 int32_t length = source.length(); |
| 177 UChar *string = NULL; |
| 178 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { |
| 179 uprv_free((UChar *)m_data_->iteratordata_.string); |
| 180 } |
| 181 m_data_->isWritable = TRUE; |
| 182 if (length > 0) { |
| 183 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); |
| 184 /* test for NULL */ |
| 185 if (string == NULL) { |
| 186 status = U_MEMORY_ALLOCATION_ERROR; |
| 187 return; |
| 188 } |
| 189 u_memcpy(string, source.getBuffer(), length); |
| 190 } |
| 191 else { |
| 192 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); |
| 193 /* test for NULL */ |
| 194 if (string == NULL) { |
| 195 status = U_MEMORY_ALLOCATION_ERROR; |
| 196 return; |
| 197 } |
| 198 *string = 0; |
| 199 } |
| 200 /* Free offsetBuffer before initializing it. */ |
| 201 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); |
| 202 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, |
| 203 &m_data_->iteratordata_, &status); |
| 204 |
| 205 m_data_->reset_ = TRUE; |
| 206 } |
| 207 |
| 208 // Sets the source to the new character iterator. |
| 209 void CollationElementIterator::setText(CharacterIterator& source, |
| 210 UErrorCode& status) |
| 211 { |
| 212 if (U_FAILURE(status)) |
| 213 return; |
| 214 |
| 215 int32_t length = source.getLength(); |
| 216 UChar *buffer = NULL; |
| 217 |
| 218 if (length == 0) { |
| 219 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); |
| 220 /* test for NULL */ |
| 221 if (buffer == NULL) { |
| 222 status = U_MEMORY_ALLOCATION_ERROR; |
| 223 return; |
| 224 } |
| 225 *buffer = 0; |
| 226 } |
| 227 else { |
| 228 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); |
| 229 /* test for NULL */ |
| 230 if (buffer == NULL) { |
| 231 status = U_MEMORY_ALLOCATION_ERROR; |
| 232 return; |
| 233 } |
| 234 /* |
| 235 Using this constructor will prevent buffer from being removed when |
| 236 string gets removed |
| 237 */ |
| 238 UnicodeString string; |
| 239 source.getText(string); |
| 240 u_memcpy(buffer, string.getBuffer(), length); |
| 241 } |
| 242 |
| 243 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { |
| 244 uprv_free((UChar *)m_data_->iteratordata_.string); |
| 245 } |
| 246 m_data_->isWritable = TRUE; |
| 247 /* Free offsetBuffer before initializing it. */ |
| 248 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); |
| 249 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, |
| 250 &m_data_->iteratordata_, &status); |
| 251 m_data_->reset_ = TRUE; |
| 252 } |
| 253 |
| 254 int32_t CollationElementIterator::strengthOrder(int32_t order) const |
| 255 { |
| 256 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); |
| 257 // Mask off the unwanted differences. |
| 258 if (s == UCOL_PRIMARY) { |
| 259 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; |
| 260 } |
| 261 else if (s == UCOL_SECONDARY) { |
| 262 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; |
| 263 } |
| 264 |
| 265 return order; |
| 266 } |
| 267 |
| 268 /* CollationElementIterator private constructors/destructors --------------- */ |
| 269 |
| 270 /** |
| 271 * This is the "real" constructor for this class; it constructs an iterator |
| 272 * over the source text using the specified collator |
| 273 */ |
| 274 CollationElementIterator::CollationElementIterator( |
| 275 const UnicodeString& sourceText, |
| 276 const RuleBasedCollator* order, |
| 277 UErrorCode& status) |
| 278 : isDataOwned_(TRUE) |
| 279 { |
| 280 if (U_FAILURE(status)) { |
| 281 return; |
| 282 } |
| 283 |
| 284 int32_t length = sourceText.length(); |
| 285 UChar *string = NULL; |
| 286 |
| 287 if (length > 0) { |
| 288 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); |
| 289 /* test for NULL */ |
| 290 if (string == NULL) { |
| 291 status = U_MEMORY_ALLOCATION_ERROR; |
| 292 return; |
| 293 } |
| 294 /* |
| 295 Using this constructor will prevent buffer from being removed when |
| 296 string gets removed |
| 297 */ |
| 298 u_memcpy(string, sourceText.getBuffer(), length); |
| 299 } |
| 300 else { |
| 301 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); |
| 302 /* test for NULL */ |
| 303 if (string == NULL) { |
| 304 status = U_MEMORY_ALLOCATION_ERROR; |
| 305 return; |
| 306 } |
| 307 *string = 0; |
| 308 } |
| 309 m_data_ = ucol_openElements(order->ucollator, string, length, &status); |
| 310 |
| 311 /* Test for buffer overflows */ |
| 312 if (U_FAILURE(status)) { |
| 313 return; |
| 314 } |
| 315 m_data_->isWritable = TRUE; |
| 316 } |
| 317 |
| 318 /** |
| 319 * This is the "real" constructor for this class; it constructs an iterator over |
| 320 * the source text using the specified collator |
| 321 */ |
| 322 CollationElementIterator::CollationElementIterator( |
| 323 const CharacterIterator& sourceText, |
| 324 const RuleBasedCollator* order, |
| 325 UErrorCode& status) |
| 326 : isDataOwned_(TRUE) |
| 327 { |
| 328 if (U_FAILURE(status)) |
| 329 return; |
| 330 |
| 331 // **** should I just drop this test? **** |
| 332 /* |
| 333 if ( sourceText.endIndex() != 0 ) |
| 334 { |
| 335 // A CollationElementIterator is really a two-layered beast. |
| 336 // Internally it uses a Normalizer to munge the source text into a form |
| 337 // where all "composed" Unicode characters (such as \u00FC) are split in
to a |
| 338 // normal character and a combining accent character. |
| 339 // Afterward, CollationElementIterator does its own processing to handle |
| 340 // expanding and contracting collation sequences, ignorables, and so on. |
| 341 |
| 342 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL |
| 343 ? Normalizer::NO_OP : order->getDecomposition(); |
| 344 |
| 345 text = new Normalizer(sourceText, decomp); |
| 346 if (text == NULL) |
| 347 status = U_MEMORY_ALLOCATION_ERROR; |
| 348 } |
| 349 */ |
| 350 int32_t length = sourceText.getLength(); |
| 351 UChar *buffer; |
| 352 if (length > 0) { |
| 353 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); |
| 354 /* test for NULL */ |
| 355 if (buffer == NULL) { |
| 356 status = U_MEMORY_ALLOCATION_ERROR; |
| 357 return; |
| 358 } |
| 359 /* |
| 360 Using this constructor will prevent buffer from being removed when |
| 361 string gets removed |
| 362 */ |
| 363 UnicodeString string(buffer, length, length); |
| 364 ((CharacterIterator &)sourceText).getText(string); |
| 365 const UChar *temp = string.getBuffer(); |
| 366 u_memcpy(buffer, temp, length); |
| 367 } |
| 368 else { |
| 369 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); |
| 370 /* test for NULL */ |
| 371 if (buffer == NULL) { |
| 372 status = U_MEMORY_ALLOCATION_ERROR; |
| 373 return; |
| 374 } |
| 375 *buffer = 0; |
| 376 } |
| 377 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); |
| 378 |
| 379 /* Test for buffer overflows */ |
| 380 if (U_FAILURE(status)) { |
| 381 return; |
| 382 } |
| 383 m_data_->isWritable = TRUE; |
| 384 } |
| 385 |
| 386 /* CollationElementIterator protected methods ----------------------------- */ |
| 387 |
| 388 const CollationElementIterator& CollationElementIterator::operator=( |
| 389 const CollationElementIterator& other) |
| 390 { |
| 391 if (this != &other) |
| 392 { |
| 393 UCollationElements *ucolelem = this->m_data_; |
| 394 UCollationElements *otherucolelem = other.m_data_; |
| 395 collIterate *coliter = &(ucolelem->iteratordata_); |
| 396 collIterate *othercoliter = &(otherucolelem->iteratordata_); |
| 397 int length = 0; |
| 398 |
| 399 // checking only UCOL_ITER_HASLEN is not enough here as we may be in |
| 400 // the normalization buffer |
| 401 length = (int)(othercoliter->endp - othercoliter->string); |
| 402 |
| 403 ucolelem->reset_ = otherucolelem->reset_; |
| 404 ucolelem->isWritable = TRUE; |
| 405 |
| 406 /* create a duplicate of string */ |
| 407 if (length > 0) { |
| 408 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); |
| 409 if(coliter->string != NULL) { |
| 410 uprv_memcpy((UChar *)coliter->string, othercoliter->string, |
| 411 length * U_SIZEOF_UCHAR); |
| 412 } else { // Error: couldn't allocate memory. No copying should be do
ne |
| 413 length = 0; |
| 414 } |
| 415 } |
| 416 else { |
| 417 coliter->string = NULL; |
| 418 } |
| 419 |
| 420 /* start and end of string */ |
| 421 coliter->endp = coliter->string + length; |
| 422 |
| 423 /* handle writable buffer here */ |
| 424 |
| 425 if (othercoliter->flags & UCOL_ITER_INNORMBUF) { |
| 426 coliter->writableBuffer = othercoliter->writableBuffer; |
| 427 coliter->writableBuffer.getTerminatedBuffer(); |
| 428 } |
| 429 |
| 430 /* current position */ |
| 431 if (othercoliter->pos >= othercoliter->string && |
| 432 othercoliter->pos <= othercoliter->endp) |
| 433 { |
| 434 coliter->pos = coliter->string + |
| 435 (othercoliter->pos - othercoliter->string); |
| 436 } |
| 437 else { |
| 438 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + |
| 439 (othercoliter->pos - othercoliter->writableBuffer.getBuffer()); |
| 440 } |
| 441 |
| 442 /* CE buffer */ |
| 443 int32_t CEsize; |
| 444 if (coliter->extendCEs) { |
| 445 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL
_EXPAND_CE_BUFFER_SIZE); |
| 446 CEsize = sizeof(othercoliter->extendCEs); |
| 447 if (CEsize > 0) { |
| 448 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize); |
| 449 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize)
; |
| 450 } |
| 451 coliter->toReturn = coliter->extendCEs + |
| 452 (othercoliter->toReturn - othercoliter->extendCEs); |
| 453 coliter->CEpos = coliter->extendCEs + CEsize; |
| 454 } else { |
| 455 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs); |
| 456 if (CEsize > 0) { |
| 457 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize); |
| 458 } |
| 459 coliter->toReturn = coliter->CEs + |
| 460 (othercoliter->toReturn - othercoliter->CEs); |
| 461 coliter->CEpos = coliter->CEs + CEsize; |
| 462 } |
| 463 |
| 464 if (othercoliter->fcdPosition != NULL) { |
| 465 coliter->fcdPosition = coliter->string + |
| 466 (othercoliter->fcdPosition |
| 467 - othercoliter->string); |
| 468 } |
| 469 else { |
| 470 coliter->fcdPosition = NULL; |
| 471 } |
| 472 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/; |
| 473 coliter->origFlags = othercoliter->origFlags; |
| 474 coliter->coll = othercoliter->coll; |
| 475 this->isDataOwned_ = TRUE; |
| 476 } |
| 477 |
| 478 return *this; |
| 479 } |
| 480 |
| 481 U_NAMESPACE_END |
| 482 |
| 483 #endif /* #if !UCONFIG_NO_COLLATION */ |
| 484 |
| 485 /* eof */ |
OLD | NEW |