| Index: source/i18n/coleitr.cpp
|
| diff --git a/source/i18n/coleitr.cpp b/source/i18n/coleitr.cpp
|
| index 7db3e5f752733a0df6dd47ec890e7a14261af6a9..40dfd11c28e6e00fe15f9554d30d4305a9f2f450 100644
|
| --- a/source/i18n/coleitr.cpp
|
| +++ b/source/i18n/coleitr.cpp
|
| @@ -1,15 +1,13 @@
|
| /*
|
| *******************************************************************************
|
| -* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
| -* others. All Rights Reserved. *
|
| +* Copyright (C) 1996-2014, International Business Machines Corporation and
|
| +* others. All Rights Reserved.
|
| *******************************************************************************
|
| */
|
|
|
| /*
|
| * File coleitr.cpp
|
| *
|
| -*
|
| -*
|
| * Created by: Helena Shih
|
| *
|
| * Modification History:
|
| @@ -20,8 +18,9 @@
|
| * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
|
| * 12/10/99 aliu Ported Thai collation support from Java.
|
| * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
|
| -* 02/19/01 swquek Removed CollationElementsIterator() since it is
|
| +* 02/19/01 swquek Removed CollationElementIterator() since it is
|
| * private constructor and no calls are made to it
|
| +* 2012-2014 markus Rewritten in C++ again.
|
| */
|
|
|
| #include "unicode/utypes.h"
|
| @@ -29,11 +28,18 @@
|
| #if !UCONFIG_NO_COLLATION
|
|
|
| #include "unicode/coleitr.h"
|
| +#include "unicode/tblcoll.h"
|
| #include "unicode/ustring.h"
|
| -#include "ucol_imp.h"
|
| -#include "uassert.h"
|
| #include "cmemory.h"
|
| -
|
| +#include "collation.h"
|
| +#include "collationdata.h"
|
| +#include "collationiterator.h"
|
| +#include "collationsets.h"
|
| +#include "collationtailoring.h"
|
| +#include "uassert.h"
|
| +#include "uhash.h"
|
| +#include "utf16collationiterator.h"
|
| +#include "uvectr32.h"
|
|
|
| /* Constants --------------------------------------------------------------- */
|
|
|
| @@ -45,27 +51,46 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
|
|
|
| CollationElementIterator::CollationElementIterator(
|
| const CollationElementIterator& other)
|
| - : UObject(other), isDataOwned_(TRUE)
|
| -{
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
|
| - &status);
|
| -
|
| + : UObject(other), iter_(NULL), rbc_(NULL), otherHalf_(0), dir_(0), offsets_(NULL) {
|
| *this = other;
|
| }
|
|
|
| CollationElementIterator::~CollationElementIterator()
|
| {
|
| - if (isDataOwned_) {
|
| - ucol_closeElements(m_data_);
|
| - }
|
| + delete iter_;
|
| + delete offsets_;
|
| }
|
|
|
| /* CollationElementIterator public methods --------------------------------- */
|
|
|
| +namespace {
|
| +
|
| +uint32_t getFirstHalf(uint32_t p, uint32_t lower32) {
|
| + return (p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff);
|
| +}
|
| +uint32_t getSecondHalf(uint32_t p, uint32_t lower32) {
|
| + return (p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
|
| +}
|
| +UBool ceNeedsTwoParts(int64_t ce) {
|
| + return (ce & INT64_C(0xffff00ff003f)) != 0;
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| int32_t CollationElementIterator::getOffset() const
|
| {
|
| - return ucol_getOffset(m_data_);
|
| + if (dir_ < 0 && offsets_ != NULL && !offsets_->isEmpty()) {
|
| + // CollationIterator::previousCE() decrements the CEs length
|
| + // while it pops CEs from its internal buffer.
|
| + int32_t i = iter_->getCEsLength();
|
| + if (otherHalf_ != 0) {
|
| + // Return the trailing CE offset while we are in the middle of a 64-bit CE.
|
| + ++i;
|
| + }
|
| + U_ASSERT(i < offsets_->size());
|
| + return offsets_->elementAti(i);
|
| + }
|
| + return iter_->getOffset();
|
| }
|
|
|
| /**
|
| @@ -75,7 +100,38 @@ int32_t CollationElementIterator::getOffset() const
|
| */
|
| int32_t CollationElementIterator::next(UErrorCode& status)
|
| {
|
| - return ucol_next(m_data_, &status);
|
| + if (U_FAILURE(status)) { return NULLORDER; }
|
| + if (dir_ > 1) {
|
| + // Continue forward iteration. Test this first.
|
| + if (otherHalf_ != 0) {
|
| + uint32_t oh = otherHalf_;
|
| + otherHalf_ = 0;
|
| + return oh;
|
| + }
|
| + } else if (dir_ == 1) {
|
| + // next() after setOffset()
|
| + dir_ = 2;
|
| + } else if (dir_ == 0) {
|
| + // The iter_ is already reset to the start of the text.
|
| + dir_ = 2;
|
| + } else /* dir_ < 0 */ {
|
| + // illegal change of direction
|
| + status = U_INVALID_STATE_ERROR;
|
| + return NULLORDER;
|
| + }
|
| + // No need to keep all CEs in the buffer when we iterate.
|
| + iter_->clearCEsIfNoneRemaining();
|
| + int64_t ce = iter_->nextCE(status);
|
| + if (ce == Collation::NO_CE) { return NULLORDER; }
|
| + // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
|
| + uint32_t p = (uint32_t)(ce >> 32);
|
| + uint32_t lower32 = (uint32_t)ce;
|
| + uint32_t firstHalf = getFirstHalf(p, lower32);
|
| + uint32_t secondHalf = getSecondHalf(p, lower32);
|
| + if (secondHalf != 0) {
|
| + otherHalf_ = secondHalf | 0xc0; // continuation CE
|
| + }
|
| + return firstHalf;
|
| }
|
|
|
| UBool CollationElementIterator::operator!=(
|
| @@ -87,56 +143,16 @@ UBool CollationElementIterator::operator!=(
|
| UBool CollationElementIterator::operator==(
|
| const CollationElementIterator& that) const
|
| {
|
| - if (this == &that || m_data_ == that.m_data_) {
|
| + if (this == &that) {
|
| return TRUE;
|
| }
|
|
|
| - // option comparison
|
| - if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
|
| - {
|
| - return FALSE;
|
| - }
|
| -
|
| - // the constructor and setText always sets a length
|
| - // and we only compare the string not the contents of the normalization
|
| - // buffer
|
| - int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
|
| - int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
|
| -
|
| - if (thislength != thatlength) {
|
| - return FALSE;
|
| - }
|
| -
|
| - if (uprv_memcmp(m_data_->iteratordata_.string,
|
| - that.m_data_->iteratordata_.string,
|
| - thislength * U_SIZEOF_UCHAR) != 0) {
|
| - return FALSE;
|
| - }
|
| - if (getOffset() != that.getOffset()) {
|
| - return FALSE;
|
| - }
|
| -
|
| - // checking normalization buffer
|
| - if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
|
| - if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
|
| - return FALSE;
|
| - }
|
| - // both are in the normalization buffer
|
| - if (m_data_->iteratordata_.pos
|
| - - m_data_->iteratordata_.writableBuffer.getBuffer()
|
| - != that.m_data_->iteratordata_.pos
|
| - - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
|
| - // not in the same position in the normalization buffer
|
| - return FALSE;
|
| - }
|
| - }
|
| - else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
|
| - return FALSE;
|
| - }
|
| - // checking ce position
|
| - return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
|
| - == (that.m_data_->iteratordata_.CEpos
|
| - - that.m_data_->iteratordata_.CEs);
|
| + return
|
| + (rbc_ == that.rbc_ || *rbc_ == *that.rbc_) &&
|
| + otherHalf_ == that.otherHalf_ &&
|
| + normalizeDir() == that.normalizeDir() &&
|
| + string_ == that.string_ &&
|
| + *iter_ == *that.iter_;
|
| }
|
|
|
| /**
|
| @@ -147,7 +163,55 @@ UBool CollationElementIterator::operator==(
|
| */
|
| int32_t CollationElementIterator::previous(UErrorCode& status)
|
| {
|
| - return ucol_previous(m_data_, &status);
|
| + if (U_FAILURE(status)) { return NULLORDER; }
|
| + if (dir_ < 0) {
|
| + // Continue backwards iteration. Test this first.
|
| + if (otherHalf_ != 0) {
|
| + uint32_t oh = otherHalf_;
|
| + otherHalf_ = 0;
|
| + return oh;
|
| + }
|
| + } else if (dir_ == 0) {
|
| + iter_->resetToOffset(string_.length());
|
| + dir_ = -1;
|
| + } else if (dir_ == 1) {
|
| + // previous() after setOffset()
|
| + dir_ = -1;
|
| + } else /* dir_ > 1 */ {
|
| + // illegal change of direction
|
| + status = U_INVALID_STATE_ERROR;
|
| + return NULLORDER;
|
| + }
|
| + if (offsets_ == NULL) {
|
| + offsets_ = new UVector32(status);
|
| + if (offsets_ == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return NULLORDER;
|
| + }
|
| + }
|
| + // If we already have expansion CEs, then we also have offsets.
|
| + // Otherwise remember the trailing offset in case we need to
|
| + // write offsets for an artificial expansion.
|
| + int32_t limitOffset = iter_->getCEsLength() == 0 ? iter_->getOffset() : 0;
|
| + int64_t ce = iter_->previousCE(*offsets_, status);
|
| + if (ce == Collation::NO_CE) { return NULLORDER; }
|
| + // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
|
| + uint32_t p = (uint32_t)(ce >> 32);
|
| + uint32_t lower32 = (uint32_t)ce;
|
| + uint32_t firstHalf = getFirstHalf(p, lower32);
|
| + uint32_t secondHalf = getSecondHalf(p, lower32);
|
| + if (secondHalf != 0) {
|
| + if (offsets_->isEmpty()) {
|
| + // When we convert a single 64-bit CE into two 32-bit CEs,
|
| + // we need to make this artificial expansion behave like a normal expansion.
|
| + // See CollationIterator::previousCE().
|
| + offsets_->addElement(iter_->getOffset(), status);
|
| + offsets_->addElement(limitOffset, status);
|
| + }
|
| + otherHalf_ = firstHalf;
|
| + return secondHalf | 0xc0; // continuation CE
|
| + }
|
| + return firstHalf;
|
| }
|
|
|
| /**
|
| @@ -155,13 +219,49 @@ int32_t CollationElementIterator::previous(UErrorCode& status)
|
| */
|
| void CollationElementIterator::reset()
|
| {
|
| - ucol_reset(m_data_);
|
| + iter_ ->resetToOffset(0);
|
| + otherHalf_ = 0;
|
| + dir_ = 0;
|
| }
|
|
|
| void CollationElementIterator::setOffset(int32_t newOffset,
|
| UErrorCode& status)
|
| {
|
| - ucol_setOffset(m_data_, newOffset, &status);
|
| + if (U_FAILURE(status)) { return; }
|
| + if (0 < newOffset && newOffset < string_.length()) {
|
| + int32_t offset = newOffset;
|
| + do {
|
| + UChar c = string_.charAt(offset);
|
| + if (!rbc_->isUnsafe(c) ||
|
| + (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)))) {
|
| + break;
|
| + }
|
| + // Back up to before this unsafe character.
|
| + --offset;
|
| + } while (offset > 0);
|
| + if (offset < newOffset) {
|
| + // We might have backed up more than necessary.
|
| + // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe,
|
| + // but for text "chu" setOffset(2) should remain at 2
|
| + // although we initially back up to offset 0.
|
| + // Find the last safe offset no greater than newOffset by iterating forward.
|
| + int32_t lastSafeOffset = offset;
|
| + do {
|
| + iter_->resetToOffset(lastSafeOffset);
|
| + do {
|
| + iter_->nextCE(status);
|
| + if (U_FAILURE(status)) { return; }
|
| + } while ((offset = iter_->getOffset()) == lastSafeOffset);
|
| + if (offset <= newOffset) {
|
| + lastSafeOffset = offset;
|
| + }
|
| + } while (offset < newOffset);
|
| + newOffset = lastSafeOffset;
|
| + }
|
| + }
|
| + iter_->resetToOffset(newOffset);
|
| + otherHalf_ = 0;
|
| + dir_ = 1;
|
| }
|
|
|
| /**
|
| @@ -174,36 +274,23 @@ void CollationElementIterator::setText(const UnicodeString& source,
|
| return;
|
| }
|
|
|
| - int32_t length = source.length();
|
| - UChar *string = NULL;
|
| - if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
|
| - uprv_free((UChar *)m_data_->iteratordata_.string);
|
| + string_ = source;
|
| + const UChar *s = string_.getBuffer();
|
| + CollationIterator *newIter;
|
| + UBool numeric = rbc_->settings->isNumeric();
|
| + if (rbc_->settings->dontCheckFCD()) {
|
| + newIter = new UTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length());
|
| + } else {
|
| + newIter = new FCDUTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length());
|
| }
|
| - m_data_->isWritable = TRUE;
|
| - if (length > 0) {
|
| - string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
|
| - /* test for NULL */
|
| - if (string == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - u_memcpy(string, source.getBuffer(), length);
|
| - }
|
| - else {
|
| - string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
|
| - /* test for NULL */
|
| - if (string == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - *string = 0;
|
| + if (newIter == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| }
|
| - /* Free offsetBuffer before initializing it. */
|
| - ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
|
| - uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
|
| - &m_data_->iteratordata_, &status);
|
| -
|
| - m_data_->reset_ = TRUE;
|
| + delete iter_;
|
| + iter_ = newIter;
|
| + otherHalf_ = 0;
|
| + dir_ = 0;
|
| }
|
|
|
| // Sets the source to the new character iterator.
|
| @@ -213,54 +300,19 @@ void CollationElementIterator::setText(CharacterIterator& source,
|
| if (U_FAILURE(status))
|
| return;
|
|
|
| - int32_t length = source.getLength();
|
| - UChar *buffer = NULL;
|
| -
|
| - if (length == 0) {
|
| - buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
|
| - /* test for NULL */
|
| - if (buffer == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - *buffer = 0;
|
| - }
|
| - else {
|
| - buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
|
| - /* test for NULL */
|
| - if (buffer == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - /*
|
| - Using this constructor will prevent buffer from being removed when
|
| - string gets removed
|
| - */
|
| - UnicodeString string;
|
| - source.getText(string);
|
| - u_memcpy(buffer, string.getBuffer(), length);
|
| - }
|
| -
|
| - if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
|
| - uprv_free((UChar *)m_data_->iteratordata_.string);
|
| - }
|
| - m_data_->isWritable = TRUE;
|
| - /* Free offsetBuffer before initializing it. */
|
| - ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
|
| - uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
|
| - &m_data_->iteratordata_, &status);
|
| - m_data_->reset_ = TRUE;
|
| + source.getText(string_);
|
| + setText(string_, status);
|
| }
|
|
|
| int32_t CollationElementIterator::strengthOrder(int32_t order) const
|
| {
|
| - UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
|
| + UColAttributeValue s = (UColAttributeValue)rbc_->settings->getStrength();
|
| // Mask off the unwanted differences.
|
| if (s == UCOL_PRIMARY) {
|
| - order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
|
| + order &= 0xffff0000;
|
| }
|
| else if (s == UCOL_SECONDARY) {
|
| - order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
|
| + order &= 0xffffff00;
|
| }
|
|
|
| return order;
|
| @@ -273,47 +325,11 @@ int32_t CollationElementIterator::strengthOrder(int32_t order) const
|
| * over the source text using the specified collator
|
| */
|
| CollationElementIterator::CollationElementIterator(
|
| - const UnicodeString& sourceText,
|
| - const RuleBasedCollator* order,
|
| - UErrorCode& status)
|
| - : isDataOwned_(TRUE)
|
| -{
|
| - if (U_FAILURE(status)) {
|
| - return;
|
| - }
|
| -
|
| - int32_t length = sourceText.length();
|
| - UChar *string = NULL;
|
| -
|
| - if (length > 0) {
|
| - string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
|
| - /* test for NULL */
|
| - if (string == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - /*
|
| - Using this constructor will prevent buffer from being removed when
|
| - string gets removed
|
| - */
|
| - u_memcpy(string, sourceText.getBuffer(), length);
|
| - }
|
| - else {
|
| - string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
|
| - /* test for NULL */
|
| - if (string == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - *string = 0;
|
| - }
|
| - m_data_ = ucol_openElements(order->ucollator, string, length, &status);
|
| -
|
| - /* Test for buffer overflows */
|
| - if (U_FAILURE(status)) {
|
| - return;
|
| - }
|
| - m_data_->isWritable = TRUE;
|
| + const UnicodeString &source,
|
| + const RuleBasedCollator *coll,
|
| + UErrorCode &status)
|
| + : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
|
| + setText(source, status);
|
| }
|
|
|
| /**
|
| @@ -321,168 +337,134 @@ CollationElementIterator::CollationElementIterator(
|
| * the source text using the specified collator
|
| */
|
| CollationElementIterator::CollationElementIterator(
|
| - const CharacterIterator& sourceText,
|
| - const RuleBasedCollator* order,
|
| - UErrorCode& status)
|
| - : isDataOwned_(TRUE)
|
| -{
|
| - if (U_FAILURE(status))
|
| - return;
|
| -
|
| - // **** should I just drop this test? ****
|
| - /*
|
| - if ( sourceText.endIndex() != 0 )
|
| - {
|
| - // A CollationElementIterator is really a two-layered beast.
|
| - // Internally it uses a Normalizer to munge the source text into a form
|
| - // where all "composed" Unicode characters (such as \u00FC) are split into a
|
| - // normal character and a combining accent character.
|
| - // Afterward, CollationElementIterator does its own processing to handle
|
| - // expanding and contracting collation sequences, ignorables, and so on.
|
| -
|
| - Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
|
| - ? Normalizer::NO_OP : order->getDecomposition();
|
| -
|
| - text = new Normalizer(sourceText, decomp);
|
| - if (text == NULL)
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - }
|
| - */
|
| - int32_t length = sourceText.getLength();
|
| - UChar *buffer;
|
| - if (length > 0) {
|
| - buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
|
| - /* test for NULL */
|
| - if (buffer == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - /*
|
| - Using this constructor will prevent buffer from being removed when
|
| - string gets removed
|
| - */
|
| - UnicodeString string(buffer, length, length);
|
| - ((CharacterIterator &)sourceText).getText(string);
|
| - const UChar *temp = string.getBuffer();
|
| - u_memcpy(buffer, temp, length);
|
| - }
|
| - else {
|
| - buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
|
| - /* test for NULL */
|
| - if (buffer == NULL) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| - *buffer = 0;
|
| - }
|
| - m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
|
| -
|
| - /* Test for buffer overflows */
|
| - if (U_FAILURE(status)) {
|
| - return;
|
| - }
|
| - m_data_->isWritable = TRUE;
|
| + const CharacterIterator &source,
|
| + const RuleBasedCollator *coll,
|
| + UErrorCode &status)
|
| + : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
|
| + // We only call source.getText() which should be const anyway.
|
| + setText(const_cast<CharacterIterator &>(source), status);
|
| }
|
|
|
| -/* CollationElementIterator protected methods ----------------------------- */
|
| +/* CollationElementIterator private methods -------------------------------- */
|
|
|
| const CollationElementIterator& CollationElementIterator::operator=(
|
| const CollationElementIterator& other)
|
| {
|
| - if (this != &other)
|
| - {
|
| - UCollationElements *ucolelem = this->m_data_;
|
| - UCollationElements *otherucolelem = other.m_data_;
|
| - collIterate *coliter = &(ucolelem->iteratordata_);
|
| - collIterate *othercoliter = &(otherucolelem->iteratordata_);
|
| - int length = 0;
|
| -
|
| - // checking only UCOL_ITER_HASLEN is not enough here as we may be in
|
| - // the normalization buffer
|
| - length = (int)(othercoliter->endp - othercoliter->string);
|
| -
|
| - ucolelem->reset_ = otherucolelem->reset_;
|
| - ucolelem->isWritable = TRUE;
|
| -
|
| - /* create a duplicate of string */
|
| - if (length > 0) {
|
| - coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
|
| - if(coliter->string != NULL) {
|
| - uprv_memcpy((UChar *)coliter->string, othercoliter->string,
|
| - length * U_SIZEOF_UCHAR);
|
| - } else { // Error: couldn't allocate memory. No copying should be done
|
| - length = 0;
|
| - }
|
| + if (this == &other) {
|
| + return *this;
|
| + }
|
| +
|
| + CollationIterator *newIter;
|
| + const FCDUTF16CollationIterator *otherFCDIter =
|
| + dynamic_cast<const FCDUTF16CollationIterator *>(other.iter_);
|
| + if(otherFCDIter != NULL) {
|
| + newIter = new FCDUTF16CollationIterator(*otherFCDIter, string_.getBuffer());
|
| + } else {
|
| + const UTF16CollationIterator *otherIter =
|
| + dynamic_cast<const UTF16CollationIterator *>(other.iter_);
|
| + if(otherIter != NULL) {
|
| + newIter = new UTF16CollationIterator(*otherIter, string_.getBuffer());
|
| + } else {
|
| + newIter = NULL;
|
| }
|
| - else {
|
| - coliter->string = NULL;
|
| + }
|
| + if(newIter != NULL) {
|
| + delete iter_;
|
| + iter_ = newIter;
|
| + rbc_ = other.rbc_;
|
| + otherHalf_ = other.otherHalf_;
|
| + dir_ = other.dir_;
|
| +
|
| + string_ = other.string_;
|
| + }
|
| + if(other.dir_ < 0 && other.offsets_ != NULL && !other.offsets_->isEmpty()) {
|
| + UErrorCode errorCode = U_ZERO_ERROR;
|
| + if(offsets_ == NULL) {
|
| + offsets_ = new UVector32(other.offsets_->size(), errorCode);
|
| }
|
| -
|
| - /* start and end of string */
|
| - coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
|
| -
|
| - /* handle writable buffer here */
|
| -
|
| - if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
|
| - coliter->writableBuffer = othercoliter->writableBuffer;
|
| - coliter->writableBuffer.getTerminatedBuffer();
|
| + if(offsets_ != NULL) {
|
| + offsets_->assign(*other.offsets_, errorCode);
|
| }
|
| + }
|
| + return *this;
|
| +}
|
| +
|
| +namespace {
|
|
|
| - /* current position */
|
| - if (othercoliter->pos >= othercoliter->string &&
|
| - othercoliter->pos <= othercoliter->endp)
|
| - {
|
| - U_ASSERT(coliter->string != NULL);
|
| - coliter->pos = coliter->string +
|
| - (othercoliter->pos - othercoliter->string);
|
| +class MaxExpSink : public ContractionsAndExpansions::CESink {
|
| +public:
|
| + MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {}
|
| + virtual ~MaxExpSink();
|
| + virtual void handleCE(int64_t /*ce*/) {}
|
| + virtual void handleExpansion(const int64_t ces[], int32_t length) {
|
| + if (length <= 1) {
|
| + // We do not need to add single CEs into the map.
|
| + return;
|
| }
|
| - else {
|
| - coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
|
| - (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
|
| + int32_t count = 0; // number of CE "halves"
|
| + for (int32_t i = 0; i < length; ++i) {
|
| + count += ceNeedsTwoParts(ces[i]) ? 2 : 1;
|
| }
|
| -
|
| - /* CE buffer */
|
| - int32_t CEsize;
|
| - if (coliter->extendCEs) {
|
| - uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
|
| - CEsize = sizeof(othercoliter->extendCEs);
|
| - if (CEsize > 0) {
|
| - othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
|
| - uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
|
| - }
|
| - coliter->toReturn = coliter->extendCEs +
|
| - (othercoliter->toReturn - othercoliter->extendCEs);
|
| - coliter->CEpos = coliter->extendCEs + CEsize;
|
| + // last "half" of the last CE
|
| + int64_t ce = ces[length - 1];
|
| + uint32_t p = (uint32_t)(ce >> 32);
|
| + uint32_t lower32 = (uint32_t)ce;
|
| + uint32_t lastHalf = getSecondHalf(p, lower32);
|
| + if (lastHalf == 0) {
|
| + lastHalf = getFirstHalf(p, lower32);
|
| + U_ASSERT(lastHalf != 0);
|
| } else {
|
| - CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
|
| - if (CEsize > 0) {
|
| - uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
|
| - }
|
| - coliter->toReturn = coliter->CEs +
|
| - (othercoliter->toReturn - othercoliter->CEs);
|
| - coliter->CEpos = coliter->CEs + CEsize;
|
| + lastHalf |= 0xc0; // old-style continuation CE
|
| }
|
| -
|
| - if (othercoliter->fcdPosition != NULL) {
|
| - U_ASSERT(coliter->string != NULL);
|
| - coliter->fcdPosition = coliter->string +
|
| - (othercoliter->fcdPosition
|
| - - othercoliter->string);
|
| - }
|
| - else {
|
| - coliter->fcdPosition = NULL;
|
| + if (count > uhash_igeti(maxExpansions, (int32_t)lastHalf)) {
|
| + uhash_iputi(maxExpansions, (int32_t)lastHalf, count, &errorCode);
|
| }
|
| - coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
|
| - coliter->origFlags = othercoliter->origFlags;
|
| - coliter->coll = othercoliter->coll;
|
| - this->isDataOwned_ = TRUE;
|
| }
|
|
|
| - return *this;
|
| +private:
|
| + UHashtable *maxExpansions;
|
| + UErrorCode &errorCode;
|
| +};
|
| +
|
| +MaxExpSink::~MaxExpSink() {}
|
| +
|
| +} // namespace
|
| +
|
| +UHashtable *
|
| +CollationElementIterator::computeMaxExpansions(const CollationData *data, UErrorCode &errorCode) {
|
| + if (U_FAILURE(errorCode)) { return NULL; }
|
| + UHashtable *maxExpansions = uhash_open(uhash_hashLong, uhash_compareLong,
|
| + uhash_compareLong, &errorCode);
|
| + if (U_FAILURE(errorCode)) { return NULL; }
|
| + MaxExpSink sink(maxExpansions, errorCode);
|
| + ContractionsAndExpansions(NULL, NULL, &sink, TRUE).forData(data, errorCode);
|
| + if (U_FAILURE(errorCode)) {
|
| + uhash_close(maxExpansions);
|
| + return NULL;
|
| + }
|
| + return maxExpansions;
|
| +}
|
| +
|
| +int32_t
|
| +CollationElementIterator::getMaxExpansion(int32_t order) const {
|
| + return getMaxExpansion(rbc_->tailoring->maxExpansions, order);
|
| +}
|
| +
|
| +int32_t
|
| +CollationElementIterator::getMaxExpansion(const UHashtable *maxExpansions, int32_t order) {
|
| + if (order == 0) { return 1; }
|
| + int32_t max;
|
| + if(maxExpansions != NULL && (max = uhash_igeti(maxExpansions, order)) != 0) {
|
| + return max;
|
| + }
|
| + if ((order & 0xc0) == 0xc0) {
|
| + // old-style continuation CE
|
| + return 2;
|
| + } else {
|
| + return 1;
|
| + }
|
| }
|
|
|
| U_NAMESPACE_END
|
|
|
| #endif /* #if !UCONFIG_NO_COLLATION */
|
| -
|
| -/* eof */
|
|
|