source/i18n/collationdata.cpp - Issue 845603002: Update ICU to 54.1 step 1

Unified Diff: source/i18n/collationdata.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/i18n/collationdata.cpp

diff --git a/source/i18n/collationdata.cpp b/source/i18n/collationdata.cpp

new file mode 100644

index 0000000000000000000000000000000000000000..42b5bedb5ab57e0ba22987cb1c5186d5bdd25e3a

--- /dev/null

+++ b/source/i18n/collationdata.cpp

@@ -0,0 +1,293 @@

+/*

+*******************************************************************************

+* collationdata.cpp

+* created on: 2012jul28

+* created by: Markus W. Scherer

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_COLLATION

+#include "unicode/ucol.h"

+#include "unicode/udata.h"

+#include "unicode/uscript.h"

+#include "cmemory.h"

+#include "collation.h"

+#include "collationdata.h"

+#include "uassert.h"

+#include "utrie2.h"

+U_NAMESPACE_BEGIN

+uint32_t

+CollationData::getIndirectCE32(uint32_t ce32) const {

+ U_ASSERT(Collation::isSpecialCE32(ce32));

+ int32_t tag = Collation::tagFromCE32(ce32);

+ if(tag == Collation::DIGIT_TAG) {

+ // Fetch the non-numeric-collation CE32.

+ ce32 = ce32s[Collation::indexFromCE32(ce32)];

+ } else if(tag == Collation::LEAD_SURROGATE_TAG) {

+ ce32 = Collation::UNASSIGNED_CE32;

+ } else if(tag == Collation::U0000_TAG) {

+ // Fetch the normal ce32 for U+0000.

+ ce32 = ce32s[0];

+ }

+ return ce32;

+uint32_t

+CollationData::getFinalCE32(uint32_t ce32) const {

+ if(Collation::isSpecialCE32(ce32)) {

+ ce32 = getIndirectCE32(ce32);

+ }

+ return ce32;

+int64_t

+CollationData::getSingleCE(UChar32 c, UErrorCode &errorCode) const {

+ if(U_FAILURE(errorCode)) { return 0; }

+ // Keep parallel with CollationDataBuilder::getSingleCE().

+ const CollationData *d;

+ uint32_t ce32 = getCE32(c);

+ if(ce32 == Collation::FALLBACK_CE32) {

+ d = base;

+ ce32 = base->getCE32(c);

+ } else {

+ d = this;

+ }

+ while(Collation::isSpecialCE32(ce32)) {

+ switch(Collation::tagFromCE32(ce32)) {

+ case Collation::LATIN_EXPANSION_TAG:

+ case Collation::BUILDER_DATA_TAG:

+ case Collation::PREFIX_TAG:

+ case Collation::CONTRACTION_TAG:

+ case Collation::HANGUL_TAG:

+ case Collation::LEAD_SURROGATE_TAG:

+ errorCode = U_UNSUPPORTED_ERROR;

+ return 0;

+ case Collation::FALLBACK_TAG:

+ case Collation::RESERVED_TAG_3:

+ errorCode = U_INTERNAL_PROGRAM_ERROR;

+ return 0;

+ case Collation::LONG_PRIMARY_TAG:

+ return Collation::ceFromLongPrimaryCE32(ce32);

+ case Collation::LONG_SECONDARY_TAG:

+ return Collation::ceFromLongSecondaryCE32(ce32);

+ case Collation::EXPANSION32_TAG:

+ if(Collation::lengthFromCE32(ce32) == 1) {

+ ce32 = d->ce32s[Collation::indexFromCE32(ce32)];

+ break;

+ } else {

+ errorCode = U_UNSUPPORTED_ERROR;

+ return 0;

+ }

+ case Collation::EXPANSION_TAG: {

+ if(Collation::lengthFromCE32(ce32) == 1) {

+ return d->ces[Collation::indexFromCE32(ce32)];

+ } else {

+ errorCode = U_UNSUPPORTED_ERROR;

+ return 0;

+ }

+ case Collation::DIGIT_TAG:

+ // Fetch the non-numeric-collation CE32 and continue.

+ ce32 = d->ce32s[Collation::indexFromCE32(ce32)];

+ break;

+ case Collation::U0000_TAG:

+ U_ASSERT(c == 0);

+ // Fetch the normal ce32 for U+0000 and continue.

+ ce32 = d->ce32s[0];

+ break;

+ case Collation::OFFSET_TAG:

+ return d->getCEFromOffsetCE32(c, ce32);

+ case Collation::IMPLICIT_TAG:

+ return Collation::unassignedCEFromCodePoint(c);

+ }

+ return Collation::ceFromSimpleCE32(ce32);

+uint32_t

+CollationData::getFirstPrimaryForGroup(int32_t script) const {

+ int32_t index = findScript(script);

+ if(index < 0) {

+ return 0;

+ }

+ uint32_t head = scripts[index];

+ return (head & 0xff00) << 16;

+uint32_t

+CollationData::getLastPrimaryForGroup(int32_t script) const {

+ int32_t index = findScript(script);

+ if(index < 0) {

+ return 0;

+ }

+ uint32_t head = scripts[index];

+ uint32_t lastByte = head & 0xff;

+ return ((lastByte + 1) << 24) - 1;

+int32_t

+CollationData::getGroupForPrimary(uint32_t p) const {

+ p >>= 24; // Reordering groups are distinguished by primary lead bytes.

+ for(int32_t i = 0; i < scriptsLength; i = i + 2 + scripts[i + 1]) {

+ uint32_t lastByte = scripts[i] & 0xff;

+ if(p <= lastByte) {

+ return scripts[i + 2];

+ }

+ return -1;

+int32_t

+CollationData::findScript(int32_t script) const {

+ if(script < 0 || 0xffff < script) { return -1; }

+ for(int32_t i = 0; i < scriptsLength;) {

+ int32_t limit = i + 2 + scripts[i + 1];

+ for(int32_t j = i + 2; j < limit; ++j) {

+ if(script == scripts[j]) { return i; }

+ }

+ i = limit;

+ }

+ return -1;

+int32_t

+CollationData::getEquivalentScripts(int32_t script,

+ int32_t dest[], int32_t capacity,

+ UErrorCode &errorCode) const {

+ if(U_FAILURE(errorCode)) { return 0; }

+ int32_t i = findScript(script);

+ if(i < 0) { return 0; }

+ int32_t length = scripts[i + 1];

+ U_ASSERT(length != 0);

+ if(length > capacity) {

+ errorCode = U_BUFFER_OVERFLOW_ERROR;

+ return length;

+ }

+ i += 2;

+ dest[0] = scripts[i++];

+ for(int32_t j = 1; j < length; ++j) {

+ script = scripts[i++];

+ // Sorted insertion.

+ for(int32_t k = j;; --k) {

+ // Invariant: dest[k] is free to receive either script or dest[k - 1].

+ if(k > 0 && script < dest[k - 1]) {

+ dest[k] = dest[k - 1];

+ } else {

+ dest[k] = script;

+ break;

+ }

+ return length;

+void

+CollationData::makeReorderTable(const int32_t *reorder, int32_t length,

+ uint8_t table[256], UErrorCode &errorCode) const {

+ if(U_FAILURE(errorCode)) { return; }

+ // Initialize the table.

+ // Never reorder special low and high primary lead bytes.

+ int32_t lowByte;

+ for(lowByte = 0; lowByte <= Collation::MERGE_SEPARATOR_BYTE; ++lowByte) {

+ table[lowByte] = lowByte;

+ }

+ // lowByte == 03

+ int32_t highByte;

+ for(highByte = 0xff; highByte >= Collation::TRAIL_WEIGHT_BYTE; --highByte) {

+ table[highByte] = highByte;

+ }

+ // highByte == FE

+ // Set intermediate bytes to 0 to indicate that they have not been set yet.

+ for(int32_t i = lowByte; i <= highByte; ++i) {

+ table[i] = 0;

+ }

+ // Get the set of special reorder codes in the input list.

+ // This supports up to 32 special reorder codes;

+ // it works for data with codes beyond UCOL_REORDER_CODE_LIMIT.

+ uint32_t specials = 0;

+ for(int32_t i = 0; i < length; ++i) {

+ int32_t reorderCode = reorder[i] - UCOL_REORDER_CODE_FIRST;

+ if(0 <= reorderCode && reorderCode <= 31) {

+ specials |= (uint32_t)1 << reorderCode;

+ }

+ // Start the reordering with the special low reorder codes that do not occur in the input.

+ for(int32_t i = 0;; i += 3) {

+ if(scripts[i + 1] != 1) { break; } // Went beyond special single-code reorder codes.

+ int32_t reorderCode = (int32_t)scripts[i + 2] - UCOL_REORDER_CODE_FIRST;

+ if(reorderCode < 0) { break; } // Went beyond special reorder codes.

+ if((specials & ((uint32_t)1 << reorderCode)) == 0) {

+ int32_t head = scripts[i];

+ int32_t firstByte = head >> 8;

+ int32_t lastByte = head & 0xff;

+ do { table[firstByte++] = lowByte++; } while(firstByte <= lastByte);

+ }

+ // Reorder according to the input scripts, continuing from the bottom of the bytes range.

+ for(int32_t i = 0; i < length;) {

+ int32_t script = reorder[i++];

+ if(script == USCRIPT_UNKNOWN) {

+ // Put the remaining scripts at the top.

+ while(i < length) {

+ script = reorder[--length];

+ if(script == USCRIPT_UNKNOWN || // Must occur at most once.

+ script == UCOL_REORDER_CODE_DEFAULT) {

+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ int32_t index = findScript(script);

+ if(index < 0) { continue; }

+ int32_t head = scripts[index];

+ int32_t firstByte = head >> 8;

+ int32_t lastByte = head & 0xff;

+ if(table[firstByte] != 0) { // Duplicate or equivalent script.

+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ do { table[lastByte--] = highByte--; } while(firstByte <= lastByte);

+ }

+ break;

+ }

+ if(script == UCOL_REORDER_CODE_DEFAULT) {

+ // The default code must be the only one in the list, and that is handled by the caller.

+ // Otherwise it must not be used.

+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ int32_t index = findScript(script);

+ if(index < 0) { continue; }

+ int32_t head = scripts[index];

+ int32_t firstByte = head >> 8;

+ int32_t lastByte = head & 0xff;

+ if(table[firstByte] != 0) { // Duplicate or equivalent script.

+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ do { table[firstByte++] = lowByte++; } while(firstByte <= lastByte);

+ }

+ // Put all remaining scripts into the middle.

+ // Avoid table[0] which must remain 0.

+ for(int32_t i = 1; i <= 0xff; ++i) {

+ if(table[i] == 0) { table[i] = lowByte++; }

+ }

+ U_ASSERT(lowByte == highByte + 1);

+U_NAMESPACE_END

+#endif // !UCONFIG_NO_COLLATION

« no previous file with comments | « source/i18n/collationdata.h ('k') | source/i18n/collationdatabuilder.h » ('j') | no next file with comments »