source/common/normalizer2impl.cpp - Issue 845603002: Update ICU to 54.1 step 1

Unified Diff: source/common/normalizer2impl.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/common/normalizer2impl.cpp

diff --git a/source/common/normalizer2impl.cpp b/source/common/normalizer2impl.cpp

index 9c00c1c818d12aff921fe62a82a14b6517ff527b..ec4809c4630995c13a4bedbba3ca49b60be755aa 100644

--- a/source/common/normalizer2impl.cpp

+++ b/source/common/normalizer2impl.cpp

@@ -1,7 +1,7 @@

*******************************************************************************

@@ -253,50 +253,12 @@ struct CanonIterData : public UMemory {

};

Normalizer2Impl::~Normalizer2Impl() {

- udata_close(memory);

- utrie2_close(normTrie);

delete fCanonIterData;

}

-UBool U_CALLCONV

-Normalizer2Impl::isAcceptable(void *context,

- const char * /* type */, const char * /*name*/,

- const UDataInfo *pInfo) {

- if(

- pInfo->size>=20 &&

- pInfo->isBigEndian==U_IS_BIG_ENDIAN &&

- pInfo->charsetFamily==U_CHARSET_FAMILY &&

- pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */

- pInfo->dataFormat[1]==0x72 &&

- pInfo->dataFormat[2]==0x6d &&

- pInfo->dataFormat[3]==0x32 &&

- pInfo->formatVersion[0]==2

- ) {

- Normalizer2Impl *me=(Normalizer2Impl *)context;

- uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);

- return TRUE;

- } else {

- return FALSE;

- }

void

-Normalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {

- if(U_FAILURE(errorCode)) {

- return;

- }

- memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);

- if(U_FAILURE(errorCode)) {

- return;

- }

- const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);

- const int32_t *inIndexes=(const int32_t *)inBytes;

- int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;

- if(indexesLength<=IX_MIN_MAYBE_YES) {

- errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.

- return;

- }

+Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,

+ const uint16_t *inExtraData, const uint8_t *inSmallFCD) {

minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];

minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];

@@ -306,23 +268,12 @@ Normalizer2Impl::load(const char *packageName, const char *name, UErrorCode &err

limitNoNo=inIndexes[IX_LIMIT_NO_NO];

minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];

- int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];

- int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];

- normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,

- inBytes+offset, nextOffset-offset, NULL,

- &errorCode);

- if(U_FAILURE(errorCode)) {

- return;

- }

+ normTrie=inTrie;

- offset=nextOffset;

- nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];

- maybeYesCompositions=(const uint16_t *)(inBytes+offset);

+ maybeYesCompositions=inExtraData;

extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);

- // smallFCD: new in formatVersion 2

- offset=nextOffset;

- smallFCD=inBytes+offset;

+ smallFCD=inSmallFCD;

// Build tccc180[].

// gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.

@@ -357,9 +308,71 @@ uint8_t Normalizer2Impl::getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, co

}

+namespace {

+class LcccContext {

+public:

+ LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {}

+ void handleRange(UChar32 start, UChar32 end, uint16_t norm16) {

+ if(impl.isAlgorithmicNoNo(norm16)) {

+ // Range of code points with same-norm16-value algorithmic decompositions.

+ // They might have different non-zero FCD16 values.

+ do {

+ uint16_t fcd16=impl.getFCD16(start);

+ if(fcd16>0xff) { set.add(start); }

+ } while(++start<=end);

+ } else {

+ uint16_t fcd16=impl.getFCD16(start);

+ if(fcd16>0xff) { set.add(start, end); }

+ }

+private:

+ const Normalizer2Impl &impl;

+ UnicodeSet &set;

+};

+struct PropertyStartsContext {

+ PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder)

+ : impl(ni), sa(adder) {}

+ const Normalizer2Impl &impl;

+ const USetAdder *sa;

+};

+} // namespace

U_CDECL_BEGIN

static UBool U_CALLCONV

+enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {

+ ((LcccContext *)context)->handleRange(start, end, (uint16_t)value);

+ return TRUE;

+static UBool U_CALLCONV

+enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {

+ /* add the start code point to the USet */

+ const PropertyStartsContext *ctx=(const PropertyStartsContext *)context;

+ const USetAdder *sa=ctx->sa;

+ sa->add(sa->set, start);

+ if(start!=end && ctx->impl.isAlgorithmicNoNo((uint16_t)value)) {

+ // Range of code points with same-norm16-value algorithmic decompositions.

+ // They might have different non-zero FCD16 values.

+ uint16_t prevFCD16=ctx->impl.getFCD16(start);

+ while(++start<=end) {

+ uint16_t fcd16=ctx->impl.getFCD16(start);

+ if(fcd16!=prevFCD16) {

+ sa->add(sa->set, start);

+ prevFCD16=fcd16;

+ }

+ return TRUE;

+static UBool U_CALLCONV

enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {

/* add the start code point to the USet */

const USetAdder *sa=(const USetAdder *)context;

@@ -375,9 +388,17 @@ segmentStarterMapper(const void * /*context*/, uint32_t value) {

U_CDECL_END

void

+Normalizer2Impl::addLcccChars(UnicodeSet &set) const {

+ /* add the start code point of each same-value range of each trie */

+ LcccContext context(*this, set);

+ utrie2_enum(normTrie, NULL, enumLcccRange, &context);

+void

Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {

/* add the start code point of each same-value range of each trie */

- utrie2_enum(normTrie, NULL, enumPropertyStartsRange, sa);

+ PropertyStartsContext context(*this, sa);

+ utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context);

/* add Hangul LV syllables and LV+1 because of skippables */

for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {

@@ -419,6 +440,38 @@ Normalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src,

return src;

}

+UnicodeString &

+Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,

+ UErrorCode &errorCode) const {

+ if(U_FAILURE(errorCode)) {

+ dest.setToBogus();

+ return dest;

+ }

+ const UChar *sArray=src.getBuffer();

+ if(&dest==&src || sArray==NULL) {

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ dest.setToBogus();

+ return dest;

+ }

+ decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);

+ return dest;

+void

+Normalizer2Impl::decompose(const UChar *src, const UChar *limit,

+ UnicodeString &dest,

+ int32_t destLengthEstimate,

+ UErrorCode &errorCode) const {

+ if(destLengthEstimate<0 && limit!=NULL) {

+ destLengthEstimate=(int32_t)(limit-src);

+ }

+ dest.remove();

+ ReorderingBuffer buffer(*this, dest);

+ if(buffer.init(destLengthEstimate, errorCode)) {

+ decompose(src, limit, &buffer, errorCode);

+ }

// Dual functionality:

// buffer!=NULL: normalize

// buffer==NULL: isNormalized/spanQuickCheckYes

« no previous file with comments | « source/common/normalizer2impl.h ('k') | source/common/propname.cpp » ('j') | no next file with comments »