| Index: icu46/source/i18n/coleitr.cpp
|
| ===================================================================
|
| --- icu46/source/i18n/coleitr.cpp (revision 0)
|
| +++ icu46/source/i18n/coleitr.cpp (revision 0)
|
| @@ -0,0 +1,485 @@
|
| +/*
|
| +*******************************************************************************
|
| +* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
| +* others. All Rights Reserved. *
|
| +*******************************************************************************
|
| +*/
|
| +
|
| +/*
|
| +* File coleitr.cpp
|
| +*
|
| +*
|
| +*
|
| +* Created by: Helena Shih
|
| +*
|
| +* Modification History:
|
| +*
|
| +* Date Name Description
|
| +*
|
| +* 6/23/97 helena Adding comments to make code more readable.
|
| +* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
|
| +* 12/10/99 aliu Ported Thai collation support from Java.
|
| +* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
|
| +* 02/19/01 swquek Removed CollationElementsIterator() since it is
|
| +* private constructor and no calls are made to it
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION
|
| +
|
| +#include "unicode/coleitr.h"
|
| +#include "unicode/ustring.h"
|
| +#include "ucol_imp.h"
|
| +#include "cmemory.h"
|
| +
|
| +
|
| +/* Constants --------------------------------------------------------------- */
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
|
| +
|
| +/* CollationElementIterator public constructor/destructor ------------------ */
|
| +
|
| +CollationElementIterator::CollationElementIterator(
|
| + const CollationElementIterator& other)
|
| + : UObject(other), isDataOwned_(TRUE)
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
|
| + &status);
|
| +
|
| + *this = other;
|
| +}
|
| +
|
| +CollationElementIterator::~CollationElementIterator()
|
| +{
|
| + if (isDataOwned_) {
|
| + ucol_closeElements(m_data_);
|
| + }
|
| +}
|
| +
|
| +/* CollationElementIterator public methods --------------------------------- */
|
| +
|
| +int32_t CollationElementIterator::getOffset() const
|
| +{
|
| + return ucol_getOffset(m_data_);
|
| +}
|
| +
|
| +/**
|
| +* Get the ordering priority of the next character in the string.
|
| +* @return the next character's ordering. Returns NULLORDER if an error has
|
| +* occured or if the end of string has been reached
|
| +*/
|
| +int32_t CollationElementIterator::next(UErrorCode& status)
|
| +{
|
| + return ucol_next(m_data_, &status);
|
| +}
|
| +
|
| +UBool CollationElementIterator::operator!=(
|
| + const CollationElementIterator& other) const
|
| +{
|
| + return !(*this == other);
|
| +}
|
| +
|
| +UBool CollationElementIterator::operator==(
|
| + const CollationElementIterator& that) const
|
| +{
|
| + if (this == &that || m_data_ == that.m_data_) {
|
| + return TRUE;
|
| + }
|
| +
|
| + // option comparison
|
| + if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
|
| + {
|
| + return FALSE;
|
| + }
|
| +
|
| + // the constructor and setText always sets a length
|
| + // and we only compare the string not the contents of the normalization
|
| + // buffer
|
| + int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
|
| + int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
|
| +
|
| + if (thislength != thatlength) {
|
| + return FALSE;
|
| + }
|
| +
|
| + if (uprv_memcmp(m_data_->iteratordata_.string,
|
| + that.m_data_->iteratordata_.string,
|
| + thislength * U_SIZEOF_UCHAR) != 0) {
|
| + return FALSE;
|
| + }
|
| + if (getOffset() != that.getOffset()) {
|
| + return FALSE;
|
| + }
|
| +
|
| + // checking normalization buffer
|
| + if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
|
| + if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
|
| + return FALSE;
|
| + }
|
| + // both are in the normalization buffer
|
| + if (m_data_->iteratordata_.pos
|
| + - m_data_->iteratordata_.writableBuffer.getBuffer()
|
| + != that.m_data_->iteratordata_.pos
|
| + - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
|
| + // not in the same position in the normalization buffer
|
| + return FALSE;
|
| + }
|
| + }
|
| + else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
|
| + return FALSE;
|
| + }
|
| + // checking ce position
|
| + return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
|
| + == (that.m_data_->iteratordata_.CEpos
|
| + - that.m_data_->iteratordata_.CEs);
|
| +}
|
| +
|
| +/**
|
| +* Get the ordering priority of the previous collation element in the string.
|
| +* @param status the error code status.
|
| +* @return the previous element's ordering. Returns NULLORDER if an error has
|
| +* occured or if the start of string has been reached.
|
| +*/
|
| +int32_t CollationElementIterator::previous(UErrorCode& status)
|
| +{
|
| + return ucol_previous(m_data_, &status);
|
| +}
|
| +
|
| +/**
|
| +* Resets the cursor to the beginning of the string.
|
| +*/
|
| +void CollationElementIterator::reset()
|
| +{
|
| + ucol_reset(m_data_);
|
| +}
|
| +
|
| +void CollationElementIterator::setOffset(int32_t newOffset,
|
| + UErrorCode& status)
|
| +{
|
| + ucol_setOffset(m_data_, newOffset, &status);
|
| +}
|
| +
|
| +/**
|
| +* Sets the source to the new source string.
|
| +*/
|
| +void CollationElementIterator::setText(const UnicodeString& source,
|
| + UErrorCode& status)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| +
|
| + int32_t length = source.length();
|
| + UChar *string = NULL;
|
| + if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
|
| + uprv_free((UChar *)m_data_->iteratordata_.string);
|
| + }
|
| + m_data_->isWritable = TRUE;
|
| + if (length > 0) {
|
| + string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
|
| + /* test for NULL */
|
| + if (string == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + u_memcpy(string, source.getBuffer(), length);
|
| + }
|
| + else {
|
| + string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
|
| + /* test for NULL */
|
| + if (string == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + *string = 0;
|
| + }
|
| + /* Free offsetBuffer before initializing it. */
|
| + ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
|
| + uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
|
| + &m_data_->iteratordata_, &status);
|
| +
|
| + m_data_->reset_ = TRUE;
|
| +}
|
| +
|
| +// Sets the source to the new character iterator.
|
| +void CollationElementIterator::setText(CharacterIterator& source,
|
| + UErrorCode& status)
|
| +{
|
| + if (U_FAILURE(status))
|
| + return;
|
| +
|
| + int32_t length = source.getLength();
|
| + UChar *buffer = NULL;
|
| +
|
| + if (length == 0) {
|
| + buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
|
| + /* test for NULL */
|
| + if (buffer == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + *buffer = 0;
|
| + }
|
| + else {
|
| + buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
|
| + /* test for NULL */
|
| + if (buffer == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + /*
|
| + Using this constructor will prevent buffer from being removed when
|
| + string gets removed
|
| + */
|
| + UnicodeString string;
|
| + source.getText(string);
|
| + u_memcpy(buffer, string.getBuffer(), length);
|
| + }
|
| +
|
| + if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
|
| + uprv_free((UChar *)m_data_->iteratordata_.string);
|
| + }
|
| + m_data_->isWritable = TRUE;
|
| + /* Free offsetBuffer before initializing it. */
|
| + ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
|
| + uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
|
| + &m_data_->iteratordata_, &status);
|
| + m_data_->reset_ = TRUE;
|
| +}
|
| +
|
| +int32_t CollationElementIterator::strengthOrder(int32_t order) const
|
| +{
|
| + UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
|
| + // Mask off the unwanted differences.
|
| + if (s == UCOL_PRIMARY) {
|
| + order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
|
| + }
|
| + else if (s == UCOL_SECONDARY) {
|
| + order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
|
| + }
|
| +
|
| + return order;
|
| +}
|
| +
|
| +/* CollationElementIterator private constructors/destructors --------------- */
|
| +
|
| +/**
|
| +* This is the "real" constructor for this class; it constructs an iterator
|
| +* over the source text using the specified collator
|
| +*/
|
| +CollationElementIterator::CollationElementIterator(
|
| + const UnicodeString& sourceText,
|
| + const RuleBasedCollator* order,
|
| + UErrorCode& status)
|
| + : isDataOwned_(TRUE)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| +
|
| + int32_t length = sourceText.length();
|
| + UChar *string = NULL;
|
| +
|
| + if (length > 0) {
|
| + string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
|
| + /* test for NULL */
|
| + if (string == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + /*
|
| + Using this constructor will prevent buffer from being removed when
|
| + string gets removed
|
| + */
|
| + u_memcpy(string, sourceText.getBuffer(), length);
|
| + }
|
| + else {
|
| + string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
|
| + /* test for NULL */
|
| + if (string == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + *string = 0;
|
| + }
|
| + m_data_ = ucol_openElements(order->ucollator, string, length, &status);
|
| +
|
| + /* Test for buffer overflows */
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + m_data_->isWritable = TRUE;
|
| +}
|
| +
|
| +/**
|
| +* This is the "real" constructor for this class; it constructs an iterator over
|
| +* the source text using the specified collator
|
| +*/
|
| +CollationElementIterator::CollationElementIterator(
|
| + const CharacterIterator& sourceText,
|
| + const RuleBasedCollator* order,
|
| + UErrorCode& status)
|
| + : isDataOwned_(TRUE)
|
| +{
|
| + if (U_FAILURE(status))
|
| + return;
|
| +
|
| + // **** should I just drop this test? ****
|
| + /*
|
| + if ( sourceText.endIndex() != 0 )
|
| + {
|
| + // A CollationElementIterator is really a two-layered beast.
|
| + // Internally it uses a Normalizer to munge the source text into a form
|
| + // where all "composed" Unicode characters (such as \u00FC) are split into a
|
| + // normal character and a combining accent character.
|
| + // Afterward, CollationElementIterator does its own processing to handle
|
| + // expanding and contracting collation sequences, ignorables, and so on.
|
| +
|
| + Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
|
| + ? Normalizer::NO_OP : order->getDecomposition();
|
| +
|
| + text = new Normalizer(sourceText, decomp);
|
| + if (text == NULL)
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + }
|
| + */
|
| + int32_t length = sourceText.getLength();
|
| + UChar *buffer;
|
| + if (length > 0) {
|
| + buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
|
| + /* test for NULL */
|
| + if (buffer == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + /*
|
| + Using this constructor will prevent buffer from being removed when
|
| + string gets removed
|
| + */
|
| + UnicodeString string(buffer, length, length);
|
| + ((CharacterIterator &)sourceText).getText(string);
|
| + const UChar *temp = string.getBuffer();
|
| + u_memcpy(buffer, temp, length);
|
| + }
|
| + else {
|
| + buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
|
| + /* test for NULL */
|
| + if (buffer == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + *buffer = 0;
|
| + }
|
| + m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
|
| +
|
| + /* Test for buffer overflows */
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + m_data_->isWritable = TRUE;
|
| +}
|
| +
|
| +/* CollationElementIterator protected methods ----------------------------- */
|
| +
|
| +const CollationElementIterator& CollationElementIterator::operator=(
|
| + const CollationElementIterator& other)
|
| +{
|
| + if (this != &other)
|
| + {
|
| + UCollationElements *ucolelem = this->m_data_;
|
| + UCollationElements *otherucolelem = other.m_data_;
|
| + collIterate *coliter = &(ucolelem->iteratordata_);
|
| + collIterate *othercoliter = &(otherucolelem->iteratordata_);
|
| + int length = 0;
|
| +
|
| + // checking only UCOL_ITER_HASLEN is not enough here as we may be in
|
| + // the normalization buffer
|
| + length = (int)(othercoliter->endp - othercoliter->string);
|
| +
|
| + ucolelem->reset_ = otherucolelem->reset_;
|
| + ucolelem->isWritable = TRUE;
|
| +
|
| + /* create a duplicate of string */
|
| + if (length > 0) {
|
| + coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
|
| + if(coliter->string != NULL) {
|
| + uprv_memcpy((UChar *)coliter->string, othercoliter->string,
|
| + length * U_SIZEOF_UCHAR);
|
| + } else { // Error: couldn't allocate memory. No copying should be done
|
| + length = 0;
|
| + }
|
| + }
|
| + else {
|
| + coliter->string = NULL;
|
| + }
|
| +
|
| + /* start and end of string */
|
| + coliter->endp = coliter->string + length;
|
| +
|
| + /* handle writable buffer here */
|
| +
|
| + if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
|
| + coliter->writableBuffer = othercoliter->writableBuffer;
|
| + coliter->writableBuffer.getTerminatedBuffer();
|
| + }
|
| +
|
| + /* current position */
|
| + if (othercoliter->pos >= othercoliter->string &&
|
| + othercoliter->pos <= othercoliter->endp)
|
| + {
|
| + coliter->pos = coliter->string +
|
| + (othercoliter->pos - othercoliter->string);
|
| + }
|
| + else {
|
| + coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
|
| + (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
|
| + }
|
| +
|
| + /* CE buffer */
|
| + int32_t CEsize;
|
| + if (coliter->extendCEs) {
|
| + uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
|
| + CEsize = sizeof(othercoliter->extendCEs);
|
| + if (CEsize > 0) {
|
| + othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
|
| + uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
|
| + }
|
| + coliter->toReturn = coliter->extendCEs +
|
| + (othercoliter->toReturn - othercoliter->extendCEs);
|
| + coliter->CEpos = coliter->extendCEs + CEsize;
|
| + } else {
|
| + CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
|
| + if (CEsize > 0) {
|
| + uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
|
| + }
|
| + coliter->toReturn = coliter->CEs +
|
| + (othercoliter->toReturn - othercoliter->CEs);
|
| + coliter->CEpos = coliter->CEs + CEsize;
|
| + }
|
| +
|
| + if (othercoliter->fcdPosition != NULL) {
|
| + coliter->fcdPosition = coliter->string +
|
| + (othercoliter->fcdPosition
|
| + - othercoliter->string);
|
| + }
|
| + else {
|
| + coliter->fcdPosition = NULL;
|
| + }
|
| + coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
|
| + coliter->origFlags = othercoliter->origFlags;
|
| + coliter->coll = othercoliter->coll;
|
| + this->isDataOwned_ = TRUE;
|
| + }
|
| +
|
| + return *this;
|
| +}
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif /* #if !UCONFIG_NO_COLLATION */
|
| +
|
| +/* eof */
|
|
|
| Property changes on: icu46/source/i18n/coleitr.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|