icu52/patches/search_collation.patch - Issue 224943002: icu local change part1

Unified Diff: icu52/patches/search_collation.patch

Issue 224943002: icu local change part1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/

Patch Set: function indentation changed Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: icu52/patches/search_collation.patch

===================================================================

--- icu52/patches/search_collation.patch (revision 261238)

+++ icu52/patches/search_collation.patch (working copy)

@@ -1,1083 +0,0 @@

-Index: source/test/cintltst/usrchtst.c

-===================================================================

---- source/test/cintltst/usrchtst.c (revision 75773)

-+++ source/test/cintltst/usrchtst.c (working copy)

-@@ -1,5 +1,5 @@

- /********************************************************************

- ********************************************************************

- * File usrchtst.c

-@@ -2553,7 +2553,173 @@

- ucol_close(coll);

- }

-+/**

-+* TestUsingSearchCollator

-+*/

-+#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0]))

-+typedef struct {

-+ const UChar * pattern;

-+ const int32_t * offsets;

-+ int32_t offsetsLen;

-+} PatternAndOffsets;

-+static const UChar scKoText[] = {

-+ 0x0020,

-+/*01*/ 0xAC00, 0x0020, /* simple LV Hangul */

-+/*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */

-+/*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for search */

-+/*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands for search */

-+/*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */

-+/*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining jamo (per search rules) */

-+/*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo */

-+/*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; last expands for search */

-+/*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining jamo; last expands for search */

-+/*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; all expand for search */

-+/*34*/ 0x00E6, 0x0020, /* small letter ae, expands */

-+/*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and acute, decomposes */

-+ 0

-+};

-+static const UChar scKoPat0[] = { 0xAC01, 0 };

-+static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as conjoining jamo */

-+static const UChar scKoPat2[] = { 0xAC0F, 0 };

-+static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0F as basic conjoining jamo */

-+static const UChar scKoPat4[] = { 0xAFFF, 0 };

-+static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as conjoining jamo */

-+static const int32_t scKoSrchOff01[] = { 3, 9, 13 };

-+static const int32_t scKoSrchOff23[] = { 5, 21, 25 };

-+static const int32_t scKoSrchOff45[] = { 7, 30 };

-+static const PatternAndOffsets scKoSrchPatternsOffsets[] = {

-+ { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },

-+ { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },

-+ { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },

-+ { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },

-+ { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },

-+ { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },

-+ { NULL, NULL, 0 }

-+};

-+static const int32_t scKoStndOff01[] = { 3, 9 };

-+static const int32_t scKoStndOff2[] = { 5, 21 };

-+static const int32_t scKoStndOff3[] = { 25 };

-+static const int32_t scKoStndOff45[] = { 7, 30 };

-+static const PatternAndOffsets scKoStndPatternsOffsets[] = {

-+ { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },

-+ { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },

-+ { scKoPat2, scKoStndOff2, ARRAY_LENGTH(scKoStndOff2) },

-+ { scKoPat3, scKoStndOff3, ARRAY_LENGTH(scKoStndOff3) },

-+ { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },

-+ { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },

-+ { NULL, NULL, 0 }

-+};

-+typedef struct {

-+ const char * locale;

-+ const UChar * text;

-+ const PatternAndOffsets * patternsAndOffsets;

-+} TUSCItem;

-+static const TUSCItem tuscItems[] = {

-+ { "root", scKoText, scKoStndPatternsOffsets },

-+ { "root@collation=search", scKoText, scKoSrchPatternsOffsets },

-+ { "ko@collation=search", scKoText, scKoSrchPatternsOffsets },

-+ { NULL, NULL, NULL }

-+};

-+static const UChar dummyPat[] = { 0x0061, 0 };

-+static void TestUsingSearchCollator(void)

-+{

-+ const TUSCItem * tuscItemPtr;

-+ for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) {

-+ UErrorCode status = U_ZERO_ERROR;

-+ UCollator* ucol = ucol_open(tuscItemPtr->locale, &status);

-+ if ( U_SUCCESS(status) ) {

-+ UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscItemPtr->text, -1, ucol, NULL, &status);

-+ if ( U_SUCCESS(status) ) {

-+ const PatternAndOffsets * patternsOffsetsPtr;

-+ for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; patternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) {

-+ usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1, &status);

-+ if ( U_SUCCESS(status) ) {

-+ int32_t offset;

-+ const int32_t * nextOffsetPtr;

-+ const int32_t * limitOffsetPtr;

-+ usearch_reset(usrch);

-+ nextOffsetPtr = patternsOffsetsPtr->offsets;

-+ limitOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;

-+ while (TRUE) {

-+ offset = usearch_next(usrch, &status);

-+ if ( U_FAILURE(status) || offset == USEARCH_DONE ) {

-+ break;

-+ }

-+ if ( nextOffsetPtr < limitOffsetPtr ) {

-+ if (offset != *nextOffsetPtr) {

-+ log_err("error, locale %s, expected usearch_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);

-+ nextOffsetPtr = limitOffsetPtr;

-+ break;

-+ }

-+ nextOffsetPtr++;

-+ } else {

-+ log_err("error, locale %s, usearch_next returned more matches than expected\n", tuscItemPtr->locale );

-+ }

-+ if ( U_FAILURE(status) ) {

-+ log_err("error, locale %s, usearch_next failed: %s\n", tuscItemPtr->locale, u_errorName(status) );

-+ } else if ( nextOffsetPtr < limitOffsetPtr ) {

-+ log_err("error, locale %s, usearch_next returned fewer matches than expected\n", tuscItemPtr->locale );

-+ }

-+ status = U_ZERO_ERROR;

-+ usearch_reset(usrch);

-+ nextOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;

-+ limitOffsetPtr = patternsOffsetsPtr->offsets;

-+ while (TRUE) {

-+ offset = usearch_previous(usrch, &status);

-+ if ( U_FAILURE(status) || offset == USEARCH_DONE ) {

-+ break;

-+ }

-+ if ( nextOffsetPtr > limitOffsetPtr ) {

-+ nextOffsetPtr--;

-+ if (offset != *nextOffsetPtr) {

-+ log_err("error, locale %s, expected usearch_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);

-+ nextOffsetPtr = limitOffsetPtr;

-+ break;

-+ }

-+ } else {

-+ log_err("error, locale %s, usearch_previous returned more matches than expected\n", tuscItemPtr->locale );

-+ }

-+ if ( U_FAILURE(status) ) {

-+ log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr->locale, u_errorName(status) );

-+ } else if ( nextOffsetPtr > limitOffsetPtr ) {

-+ log_err("error, locale %s, usearch_previous returned fewer matches than expected\n", tuscItemPtr->locale );

-+ }

-+ } else {

-+ log_err("error, locale %s, usearch_setPattern failed: %s\n", tuscItemPtr->locale, u_errorName(status) );

-+ }

-+ usearch_close(usrch);

-+ } else {

-+ log_err("error, locale %s, usearch_openFromCollator failed: %s\n", tuscItemPtr->locale, u_errorName(status) );

-+ }

-+ ucol_close(ucol);

-+ } else {

-+ log_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) );

-+ }

-+}

-+/**

-+* addSearchTest

-+*/

- void addSearchTest(TestNode** root)

- {

- addTest(root, &TestStart, "tscoll/usrchtst/TestStart");

-@@ -2608,6 +2774,7 @@

- addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward");

- addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");

- addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical");

-+ addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCollator");

- }

- #endif /* #if !UCONFIG_NO_COLLATION */

-Index: source/test/cintltst/citertst.c

-===================================================================

---- source/test/cintltst/citertst.c (revision 75773)

-+++ source/test/cintltst/citertst.c (working copy)

-@@ -1,6 +1,6 @@

- /********************************************************************

- * COPYRIGHT:

- ********************************************************************/

- /********************************************************************************

-@@ -22,6 +22,7 @@

- #if !UCONFIG_NO_COLLATION

- #include "unicode/ucol.h"

-+#include "unicode/ucoleitr.h"

- #include "unicode/uloc.h"

- #include "unicode/uchar.h"

- #include "unicode/ustring.h"

-@@ -58,6 +59,7 @@

- addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");

- addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");

- addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");

-+ addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");

- }

- /* The locales we support */

-@@ -2017,4 +2019,141 @@

- T_FileStream_close(file);

- }

-+/**

-+* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with

-+* normalization on AND jamo tailoring, among other things.

-+*/

-+static const UChar tsceText[] = { /* Nothing in here should be ignorable */

-+ 0x0020, 0xAC00, /* simple LV Hangul */

-+ 0x0020, 0xAC01, /* simple LVT Hangul */

-+ 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */

-+ 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */

-+ 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */

-+ 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */

-+ 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */

-+ 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */

-+ 0x0020, 0x00E6, /* small letter ae, expands */

-+ 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */

-+ 0x0020

-+};

-+enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };

-+static const int32_t rootStandardOffsets[] = {

-+ 0, 1,2,

-+ 2, 3,4,4,

-+ 4, 5,6,6,

-+ 6, 7,8,8,

-+ 8, 9,10,11,

-+ 12, 13,14,15,

-+ 16, 17,18,19,

-+ 20, 21,22,23,

-+ 24, 25,26,26,26,

-+ 26, 27,28,28,

-+ 28,

-+ 29

-+};

-+enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };

-+static const int32_t rootSearchOffsets[] = {

-+ 0, 1,2,

-+ 2, 3,4,4,

-+ 4, 5,6,6,6,

-+ 6, 7,8,8,8,8,8,8,

-+ 8, 9,10,11,

-+ 12, 13,14,15,

-+ 16, 17,18,19,20,

-+ 20, 21,22,22,23,23,23,24,

-+ 24, 25,26,26,26,

-+ 26, 27,28,28,

-+ 28,

-+ 29

-+};

-+enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };

-+typedef struct {

-+ const char * locale;

-+ const int32_t * offsets;

-+ int32_t offsetsLen;

-+} TSCEItem;

-+static const TSCEItem tsceItems[] = {

-+ { "root", rootStandardOffsets, kLen_rootStandardOffsets },

-+ { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },

-+ { NULL, NULL, 0 }

-+};

-+static void TestSearchCollatorElements(void)

-+{

-+ const TSCEItem * tsceItemPtr;

-+ for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {

-+ UErrorCode status = U_ZERO_ERROR;

-+ UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);

-+ if ( U_SUCCESS(status) ) {

-+ UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);

-+ if ( U_SUCCESS(status) ) {

-+ int32_t offset, element;

-+ const int32_t * nextOffsetPtr;

-+ const int32_t * limitOffsetPtr;

-+ nextOffsetPtr = tsceItemPtr->offsets;

-+ limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;

-+ do {

-+ offset = ucol_getOffset(uce);

-+ element = ucol_next(uce, &status);

-+ if ( element == 0 ) {

-+ log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );

-+ }

-+ if ( nextOffsetPtr < limitOffsetPtr ) {

-+ if (offset != *nextOffsetPtr) {

-+ log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",

-+ tsceItemPtr->locale, *nextOffsetPtr, offset );

-+ nextOffsetPtr = limitOffsetPtr;

-+ break;

-+ }

-+ nextOffsetPtr++;

-+ } else {

-+ log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );

-+ }

-+ } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );

-+ if ( nextOffsetPtr < limitOffsetPtr ) {

-+ log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );

-+ }

-+ ucol_setOffset(uce, kLen_tsceText, &status);

-+ status = U_ZERO_ERROR;

-+ nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;

-+ limitOffsetPtr = tsceItemPtr->offsets;

-+ do {

-+ offset = ucol_getOffset(uce);

-+ element = ucol_previous(uce, &status);

-+ if ( element == 0 ) {

-+ log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );

-+ }

-+ if ( nextOffsetPtr > limitOffsetPtr ) {

-+ nextOffsetPtr--;

-+ if (offset != *nextOffsetPtr) {

-+ log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",

-+ tsceItemPtr->locale, *nextOffsetPtr, offset );

-+ nextOffsetPtr = limitOffsetPtr;

-+ break;

-+ }

-+ } else {

-+ log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );

-+ }

-+ } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );

-+ if ( nextOffsetPtr > limitOffsetPtr ) {

-+ log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );

-+ }

-+ ucol_closeElements(uce);

-+ } else {

-+ log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );

-+ }

-+ ucol_close(ucol);

-+ } else {

-+ log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );

-+ }

-+}

- #endif /* #if !UCONFIG_NO_COLLATION */

-Index: source/test/cintltst/citertst.h

-===================================================================

---- source/test/cintltst/citertst.h (revision 75773)

-+++ source/test/cintltst/citertst.h (working copy)

-@@ -1,6 +1,6 @@

- /********************************************************************

- * COPYRIGHT:

- ********************************************************************/

- /********************************************************************************

-@@ -101,6 +101,11 @@

- * Bound checkings.

- */

- static void TestSortKeyValidity(void);

-+/**

-+* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with

-+* normalization on AND jamo tailoring, among other things.

-+*/

-+static void TestSearchCollatorElements(void);

- /*------------------------------------------------------------------------

- Internal utilities

-Index: source/i18n/ucol.cpp

-===================================================================

---- source/i18n/ucol.cpp (revision 75773)

-+++ source/i18n/ucol.cpp (working copy)

-@@ -1,6 +1,6 @@

- /*

- *******************************************************************************

- * file name: ucol.cpp

-@@ -1444,173 +1444,176 @@

- UChar ch = 0;

- collationSource->offsetReturn = NULL;

-- for (;;) /* Loop handles case when incremental normalize switches */

-- { /* to or from the side buffer / original string, and we */

-- /* need to start again to get the next character. */

-+ do {

-+ for (;;) /* Loop handles case when incremental normalize switches */

-+ { /* to or from the side buffer / original string, and we */

-+ /* need to start again to get the next character. */

-- if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)

-- {

-- // The source string is null terminated and we're not working from the side buffer,

-- // and we're not normalizing. This is the fast path.

-- // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)

-- ch = *collationSource->pos++;

-- if (ch != 0) {

-- break;

-+ if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)

-+ {

-+ // The source string is null terminated and we're not working from the side buffer,

-+ // and we're not normalizing. This is the fast path.

-+ // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)

-+ ch = *collationSource->pos++;

-+ if (ch != 0) {

-+ break;

-+ }

-+ else {

-+ return UCOL_NO_MORE_CES;

-+ }

- }

-- else {

-- return UCOL_NO_MORE_CES;

-- }

-- if (collationSource->flags & UCOL_ITER_HASLEN) {

-- // Normal path for strings when length is specified.

-- // (We can't be in side buffer because it is always null terminated.)

-- if (collationSource->pos >= collationSource->endp) {

-- // Ran off of the end of the main source string. We're done.

-- return UCOL_NO_MORE_CES;

-+ if (collationSource->flags & UCOL_ITER_HASLEN) {

-+ // Normal path for strings when length is specified.

-+ // (We can't be in side buffer because it is always null terminated.)

-+ if (collationSource->pos >= collationSource->endp) {

-+ // Ran off of the end of the main source string. We're done.

-+ return UCOL_NO_MORE_CES;

-+ }

-+ ch = *collationSource->pos++;

- }

-- ch = *collationSource->pos++;

-- }

-- else if(collationSource->flags & UCOL_USE_ITERATOR) {

-- UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);

-- if(iterCh == U_SENTINEL) {

-- return UCOL_NO_MORE_CES;

-- }

-- ch = (UChar)iterCh;

-- }

-- else

-- {

-- // Null terminated string.

-- ch = *collationSource->pos++;

-- if (ch == 0) {

-- // Ran off end of buffer.

-- if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {

-- // Ran off end of main string. backing up one character.

-- collationSource->pos--;

-+ else if(collationSource->flags & UCOL_USE_ITERATOR) {

-+ UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);

-+ if(iterCh == U_SENTINEL) {

- return UCOL_NO_MORE_CES;

- }

-- else

-- {

-- // Hit null in the normalize side buffer.

-- // Usually this means the end of the normalized data,

-- // except for one odd case: a null followed by combining chars,

-- // which is the case if we are at the start of the buffer.

-- if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {

-- break;

-+ ch = (UChar)iterCh;

-+ }

-+ else

-+ {

-+ // Null terminated string.

-+ ch = *collationSource->pos++;

-+ if (ch == 0) {

-+ // Ran off end of buffer.

-+ if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {

-+ // Ran off end of main string. backing up one character.

-+ collationSource->pos--;

-+ return UCOL_NO_MORE_CES;

- }

-+ else

-+ {

-+ // Hit null in the normalize side buffer.

-+ // Usually this means the end of the normalized data,

-+ // except for one odd case: a null followed by combining chars,

-+ // which is the case if we are at the start of the buffer.

-+ if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {

-+ break;

-+ }

-- // Null marked end of side buffer.

-- // Revert to the main string and

-- // loop back to top to try again to get a character.

-- collationSource->pos = collationSource->fcdPosition;

-- collationSource->flags = collationSource->origFlags;

-- continue;

-+ // Null marked end of side buffer.

-+ // Revert to the main string and

-+ // loop back to top to try again to get a character.

-+ collationSource->pos = collationSource->fcdPosition;

-+ collationSource->flags = collationSource->origFlags;

-+ continue;

-+ }

- }

-- }

-- if(collationSource->flags&UCOL_HIRAGANA_Q) {

-- /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag

-- * based on whether the previous codepoint was Hiragana or Katakana.

-- */

-- if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||

-- ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {

-- collationSource->flags |= UCOL_WAS_HIRAGANA;

-- } else {

-- collationSource->flags &= ~UCOL_WAS_HIRAGANA;

-+ if(collationSource->flags&UCOL_HIRAGANA_Q) {

-+ /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag

-+ * based on whether the previous codepoint was Hiragana or Katakana.

-+ */

-+ if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||

-+ ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {

-+ collationSource->flags |= UCOL_WAS_HIRAGANA;

-+ } else {

-+ collationSource->flags &= ~UCOL_WAS_HIRAGANA;

-+ }

- }

-- }

-- // We've got a character. See if there's any fcd and/or normalization stuff to do.

-- // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.

-- if ((collationSource->flags & UCOL_ITER_NORM) == 0) {

-- break;

-- }

-+ // We've got a character. See if there's any fcd and/or normalization stuff to do.

-+ // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.

-+ if ((collationSource->flags & UCOL_ITER_NORM) == 0) {

-+ break;

-+ }

-- if (collationSource->fcdPosition >= collationSource->pos) {

-- // An earlier FCD check has already covered the current character.

-- // We can go ahead and process this char.

-- break;

-- }

-- if (ch < ZERO_CC_LIMIT_ ) {

-- // Fast fcd safe path. Trailing combining class == 0. This char is OK.

-- break;

-- }

-- if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {

-- // We need to peek at the next character in order to tell if we are FCD

-- if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {

-- // We are at the last char of source string.

-- // It is always OK for FCD check.

-+ if (collationSource->fcdPosition >= collationSource->pos) {

-+ // An earlier FCD check has already covered the current character.

-+ // We can go ahead and process this char.

- break;

- }

-- // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test

-- if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {

-+ if (ch < ZERO_CC_LIMIT_ ) {

-+ // Fast fcd safe path. Trailing combining class == 0. This char is OK.

- break;

- }

-- }

-+ if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {

-+ // We need to peek at the next character in order to tell if we are FCD

-+ if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {

-+ // We are at the last char of source string.

-+ // It is always OK for FCD check.

-+ break;

-+ }

-- // Need a more complete FCD check and possible normalization.

-- if (collIterFCD(collationSource)) {

-- collIterNormalize(collationSource);

-- }

-- if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {

-- // No normalization was needed. Go ahead and process the char we already had.

-- break;

-- }

-+ // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test

-+ if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {

-+ break;

-+ }

-- // Some normalization happened. Next loop iteration will pick up a char

-- // from the normalization buffer.

-- } // end for (;;)

-+ // Need a more complete FCD check and possible normalization.

-+ if (collIterFCD(collationSource)) {

-+ collIterNormalize(collationSource);

-+ }

-+ if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {

-+ // No normalization was needed. Go ahead and process the char we already had.

-+ break;

-+ }

-+ // Some normalization happened. Next loop iteration will pick up a char

-+ // from the normalization buffer.

-- if (ch <= 0xFF) {

-- /* For latin-1 characters we never need to fall back to the UCA table */

-- /* because all of the UCA data is replicated in the latinOneMapping array */

-- order = coll->latinOneMapping[ch];

-- if (order > UCOL_NOT_FOUND) {

-- order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);

-+ } // end for (;;)

-+ if (ch <= 0xFF) {

-+ /* For latin-1 characters we never need to fall back to the UCA table */

-+ /* because all of the UCA data is replicated in the latinOneMapping array */

-+ order = coll->latinOneMapping[ch];

-+ if (order > UCOL_NOT_FOUND) {

-+ order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);

-+ }

- }

-- }

-- else

-- {

-- // Always use UCA for Han, Hangul

-- // (Han extension A is before main Han block)

-- // **** Han compatibility chars ?? ****

-- if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&

-- (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {

-- if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {

-- // between the two target ranges; do normal lookup

-- // **** this range is YI, Modifier tone letters, ****

-- // **** Latin-D, Syloti Nagari, Phagas-pa. ****

-- // **** Latin-D might be tailored, so we need to ****

-- // **** do the normal lookup for these guys. ****

-+ else

-+ {

-+ // Always use UCA for Han, Hangul

-+ // (Han extension A is before main Han block)

-+ // **** Han compatibility chars ?? ****

-+ if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&

-+ (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {

-+ if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {

-+ // between the two target ranges; do normal lookup

-+ // **** this range is YI, Modifier tone letters, ****

-+ // **** Latin-D, Syloti Nagari, Phagas-pa. ****

-+ // **** Latin-D might be tailored, so we need to ****

-+ // **** do the normal lookup for these guys. ****

-+ order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

-+ } else {

-+ // in one of the target ranges; use UCA

-+ order = UCOL_NOT_FOUND;

-+ }

-+ } else {

- order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

-- } else {

-- // in one of the target ranges; use UCA

-- order = UCOL_NOT_FOUND;

- }

-- } else {

-- order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

-- }

-- if(order > UCOL_NOT_FOUND) { /* if a CE is special */

-- order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */

-- }

-+ if(order > UCOL_NOT_FOUND) { /* if a CE is special */

-+ order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */

-+ }

-- if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */

-- /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */

-- order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);

-+ if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */

-+ /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */

-+ order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);

-- if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */

-- order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);

-+ if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */

-+ order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);

-+ }

- }

-- }

-+ } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );

- if(order == UCOL_NOT_FOUND) {

- order = getImplicit(ch, collationSource);

- }

-@@ -1958,161 +1961,163 @@

- else {

- UChar ch = 0;

-- /*

-- Loop handles case when incremental normalize switches to or from the

-- side buffer / original string, and we need to start again to get the

-- next character.

-- */

-- for (;;) {

-- if (data->flags & UCOL_ITER_HASLEN) {

-- /*

-- Normal path for strings when length is specified.

-- Not in side buffer because it is always null terminated.

-- */

-- if (data->pos <= data->string) {

-- /* End of the main source string */

-- return UCOL_NO_MORE_CES;

-- }

-- data->pos --;

-- ch = *data->pos;

-- }

-- // we are using an iterator to go back. Pray for us!

-- else if (data->flags & UCOL_USE_ITERATOR) {

-- UChar32 iterCh = data->iterator->previous(data->iterator);

-- if(iterCh == U_SENTINEL) {

-- return UCOL_NO_MORE_CES;

-- } else {

-- ch = (UChar)iterCh;

-- }

-- else {

-- data->pos --;

-- ch = *data->pos;

-- /* we are in the side buffer. */

-- if (ch == 0) {

-+ do {

-+ /*

-+ Loop handles case when incremental normalize switches to or from the

-+ side buffer / original string, and we need to start again to get the

-+ next character.

-+ */

-+ for (;;) {

-+ if (data->flags & UCOL_ITER_HASLEN) {

- /*

-- At the start of the normalize side buffer.

-- Go back to string.

-- Because pointer points to the last accessed character,

-- hence we have to increment it by one here.

-+ Normal path for strings when length is specified.

-+ Not in side buffer because it is always null terminated.

- */

-- data->flags = data->origFlags;

-- data->offsetRepeatValue = 0;

-- if (data->fcdPosition == NULL) {

-- data->pos = data->string;

-+ if (data->pos <= data->string) {

-+ /* End of the main source string */

- return UCOL_NO_MORE_CES;

- }

-- else {

-- data->pos = data->fcdPosition + 1;

-+ data->pos --;

-+ ch = *data->pos;

-+ }

-+ // we are using an iterator to go back. Pray for us!

-+ else if (data->flags & UCOL_USE_ITERATOR) {

-+ UChar32 iterCh = data->iterator->previous(data->iterator);

-+ if(iterCh == U_SENTINEL) {

-+ return UCOL_NO_MORE_CES;

-+ } else {

-+ ch = (UChar)iterCh;

-+ }

-+ else {

-+ data->pos --;

-+ ch = *data->pos;

-+ /* we are in the side buffer. */

-+ if (ch == 0) {

-+ /*

-+ At the start of the normalize side buffer.

-+ Go back to string.

-+ Because pointer points to the last accessed character,

-+ hence we have to increment it by one here.

-+ */

-+ data->flags = data->origFlags;

-+ data->offsetRepeatValue = 0;

-+ if (data->fcdPosition == NULL) {

-+ data->pos = data->string;

-+ return UCOL_NO_MORE_CES;

-+ }

-+ else {

-+ data->pos = data->fcdPosition + 1;

-+ }

-+ continue;

- }

-- continue;

- }

-- }

-- if(data->flags&UCOL_HIRAGANA_Q) {

-- if(ch>=0x3040 && ch<=0x309f) {

-- data->flags |= UCOL_WAS_HIRAGANA;

-- } else {

-- data->flags &= ~UCOL_WAS_HIRAGANA;

-- }

-+ if(data->flags&UCOL_HIRAGANA_Q) {

-+ if(ch>=0x3040 && ch<=0x309f) {

-+ data->flags |= UCOL_WAS_HIRAGANA;

-+ } else {

-+ data->flags &= ~UCOL_WAS_HIRAGANA;

-+ }

-- /*

-- * got a character to determine if there's fcd and/or normalization

-- * stuff to do.

-- * if the current character is not fcd.

-- * if current character is at the start of the string

-- * Trailing combining class == 0.

-- * Note if pos is in the writablebuffer, norm is always 0

-- */

-- if (ch < ZERO_CC_LIMIT_ ||

-- // this should propel us out of the loop in the iterator case

-- (data->flags & UCOL_ITER_NORM) == 0 ||

-- (data->fcdPosition != NULL && data->fcdPosition <= data->pos)

-- || data->string == data->pos) {

-- break;

-- }

-- if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {

-- /* if next character is FCD */

-- if (data->pos == data->string) {

-- /* First char of string is always OK for FCD check */

-+ /*

-+ * got a character to determine if there's fcd and/or normalization

-+ * stuff to do.

-+ * if the current character is not fcd.

-+ * if current character is at the start of the string

-+ * Trailing combining class == 0.

-+ * Note if pos is in the writablebuffer, norm is always 0

-+ */

-+ if (ch < ZERO_CC_LIMIT_ ||

-+ // this should propel us out of the loop in the iterator case

-+ (data->flags & UCOL_ITER_NORM) == 0 ||

-+ (data->fcdPosition != NULL && data->fcdPosition <= data->pos)

-+ || data->string == data->pos) {

- break;

- }

-- /* Not first char of string, do the FCD fast test */

-- if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {

-+ if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {

-+ /* if next character is FCD */

-+ if (data->pos == data->string) {

-+ /* First char of string is always OK for FCD check */

-+ break;

-+ }

-+ /* Not first char of string, do the FCD fast test */

-+ if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {

-+ break;

-+ }

-+ /* Need a more complete FCD check and possible normalization. */

-+ if (collPrevIterFCD(data)) {

-+ collPrevIterNormalize(data);

-+ }

-+ if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {

-+ /* No normalization. Go ahead and process the char. */

- break;

- }

-- }

-- /* Need a more complete FCD check and possible normalization. */

-- if (collPrevIterFCD(data)) {

-- collPrevIterNormalize(data);

-+ /*

-+ Some normalization happened.

-+ Next loop picks up a char from the normalization buffer.

-+ */

- }

-- if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {

-- /* No normalization. Go ahead and process the char. */

-- break;

-- }

-- /*

-- Some normalization happened.

-- Next loop picks up a char from the normalization buffer.

-+ /* attempt to handle contractions, after removal of the backwards

-+ contraction

- */

-- }

-- /* attempt to handle contractions, after removal of the backwards

-- contraction

-- */

-- if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {

-- result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);

-- } else {

-- if (ch <= 0xFF) {

-- result = coll->latinOneMapping[ch];

-- }

-- else {

-- // Always use UCA for [3400..9FFF], [AC00..D7AF]

-- // **** [FA0E..FA2F] ?? ****

-- if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&

-- (ch >= 0x3400 && ch <= 0xD7AF)) {

-- if (ch > 0x9FFF && ch < 0xAC00) {

-- // between the two target ranges; do normal lookup

-- // **** this range is YI, Modifier tone letters, ****

-- // **** Latin-D, Syloti Nagari, Phagas-pa. ****

-- // **** Latin-D might be tailored, so we need to ****

-- // **** do the normal lookup for these guys. ****

-- result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

-+ if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {

-+ result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);

-+ } else {

-+ if (ch <= 0xFF) {

-+ result = coll->latinOneMapping[ch];

-+ }

-+ else {

-+ // Always use UCA for [3400..9FFF], [AC00..D7AF]

-+ // **** [FA0E..FA2F] ?? ****

-+ if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&

-+ (ch >= 0x3400 && ch <= 0xD7AF)) {

-+ if (ch > 0x9FFF && ch < 0xAC00) {

-+ // between the two target ranges; do normal lookup

-+ // **** this range is YI, Modifier tone letters, ****

-+ // **** Latin-D, Syloti Nagari, Phagas-pa. ****

-+ // **** Latin-D might be tailored, so we need to ****

-+ // **** do the normal lookup for these guys. ****

-+ result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

-+ } else {

-+ result = UCOL_NOT_FOUND;

-+ }

- } else {

-- result = UCOL_NOT_FOUND;

-+ result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

- }

-- } else {

-- result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

- }

-- }

-- if (result > UCOL_NOT_FOUND) {

-- result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);

-- }

-- if (result == UCOL_NOT_FOUND) { // Not found in master list

-- if (!isAtStartPrevIterate(data) &&

-- ucol_contractionEndCP(ch, data->coll))

-- {

-- result = UCOL_CONTRACTION;

-- } else {

-- if(coll->UCA) {

-- result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);

-+ if (result > UCOL_NOT_FOUND) {

-+ result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);

-+ }

-+ if (result == UCOL_NOT_FOUND) { // Not found in master list

-+ if (!isAtStartPrevIterate(data) &&

-+ ucol_contractionEndCP(ch, data->coll))

-+ {

-+ result = UCOL_CONTRACTION;

-+ } else {

-+ if(coll->UCA) {

-+ result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);

-+ }

- }

-- }

-- if (result > UCOL_NOT_FOUND) {

-- if(coll->UCA) {

-- result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);

-+ if (result > UCOL_NOT_FOUND) {

-+ if(coll->UCA) {

-+ result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);

-+ }

- }

-- }

-+ } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );

- if(result == UCOL_NOT_FOUND) {

- result = getPrevImplicit(ch, data);

-@@ -3193,6 +3198,7 @@

- // Since Hanguls pass the FCD check, it is

- // guaranteed that we won't be in

- // the normalization buffer if something like this happens

- // However, if we are using a uchar iterator and normalization

- // is ON, the Hangul that lead us here is going to be in that

- // normalization buffer. Here we want to restore the uchar

-@@ -3201,6 +3207,7 @@

- source->flags = source->origFlags; // restore the iterator

- source->pos = NULL;

- }

- // Move Jamos into normalization buffer

- UChar *buffer = source->writableBuffer.getBuffer(4);

- int32_t bufferLength;

-@@ -3214,8 +3221,9 @@

- }

- source->writableBuffer.releaseBuffer(bufferLength);

-- source->fcdPosition = source->pos; // Indicate where to continue in main input string

-- // after exhausting the writableBuffer

-+ // Indicate where to continue in main input string after exhausting the writableBuffer

-+ source->fcdPosition = source->pos;

- source->pos = source->writableBuffer.getTerminatedBuffer();

- source->origFlags = source->flags;

- source->flags |= UCOL_ITER_INNORMBUF;

-@@ -3966,13 +3974,10 @@

- // Since Hanguls pass the FCD check, it is

- // guaranteed that we won't be in

- // the normalization buffer if something like this happens

- // Move Jamos into normalization buffer

-- /*

-- Move the Jamos into the

-- normalization buffer

-- */

- UChar *tempbuffer = source->writableBuffer.getBuffer(5);

-- int32_t tempbufferLength;

-+ int32_t tempbufferLength, jamoOffset;

- tempbuffer[0] = 0;

- tempbuffer[1] = (UChar)L;

- tempbuffer[2] = (UChar)V;

-@@ -3984,16 +3989,30 @@

- }

- source->writableBuffer.releaseBuffer(tempbufferLength);

-- /*

-- Indicate where to continue in main input string after exhausting

-- the writableBuffer

-- */

-+ // Indicate where to continue in main input string after exhausting the writableBuffer

- if (source->pos == source->string) {

-+ jamoOffset = 0;

- source->fcdPosition = NULL;

- } else {

-+ jamoOffset = source->pos - source->string;

- source->fcdPosition = source->pos-1;

- }

-+ // Append offsets for the additional chars

-+ // (not the 0, and not the L whose offsets match the original Hangul)

-+ int32_t jamoRemaining = tempbufferLength - 2;

-+ jamoOffset++; // appended offsets should match end of original Hangul

-+ while (jamoRemaining-- > 0) {

-+ source->appendOffset(jamoOffset, *status);

-+ }

-+ source->offsetRepeatValue = jamoOffset;

-+ source->offsetReturn = source->offsetStore - 1;

-+ if (source->offsetReturn == source->offsetBuffer) {

-+ source->offsetStore = source->offsetBuffer;

-+ }

- source->pos = source->writableBuffer.getTerminatedBuffer() + tempbufferLength;

- source->origFlags = source->flags;

- source->flags |= UCOL_ITER_INNORMBUF;

« no previous file with comments | « icu52/patches/rtti.patch ('k') | icu52/patches/segmentation.patch » ('j') | no next file with comments »