Index: icu52/patches/search_collation.patch |
=================================================================== |
--- icu52/patches/search_collation.patch (revision 261238) |
+++ icu52/patches/search_collation.patch (working copy) |
@@ -1,1083 +0,0 @@ |
-Index: source/test/cintltst/usrchtst.c |
-=================================================================== |
---- source/test/cintltst/usrchtst.c (revision 75773) |
-+++ source/test/cintltst/usrchtst.c (working copy) |
-@@ -1,5 +1,5 @@ |
- /******************************************************************** |
-- * Copyright (c) 2001-2010 International Business Machines |
-+ * Copyright (c) 2001-2011 International Business Machines |
- * Corporation and others. All Rights Reserved. |
- ******************************************************************** |
- * File usrchtst.c |
-@@ -2553,7 +2553,173 @@ |
- ucol_close(coll); |
- } |
- |
-+/** |
-+* TestUsingSearchCollator |
-+*/ |
- |
-+#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0])) |
-+ |
-+typedef struct { |
-+ const UChar * pattern; |
-+ const int32_t * offsets; |
-+ int32_t offsetsLen; |
-+} PatternAndOffsets; |
-+ |
-+static const UChar scKoText[] = { |
-+ 0x0020, |
-+/*01*/ 0xAC00, 0x0020, /* simple LV Hangul */ |
-+/*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */ |
-+/*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for search */ |
-+/*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands for search */ |
-+/*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */ |
-+/*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining jamo (per search rules) */ |
-+/*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo */ |
-+/*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; last expands for search */ |
-+/*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining jamo; last expands for search */ |
-+/*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; all expand for search */ |
-+/*34*/ 0x00E6, 0x0020, /* small letter ae, expands */ |
-+/*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and acute, decomposes */ |
-+ 0 |
-+}; |
-+ |
-+static const UChar scKoPat0[] = { 0xAC01, 0 }; |
-+static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as conjoining jamo */ |
-+static const UChar scKoPat2[] = { 0xAC0F, 0 }; |
-+static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0F as basic conjoining jamo */ |
-+static const UChar scKoPat4[] = { 0xAFFF, 0 }; |
-+static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as conjoining jamo */ |
-+ |
-+static const int32_t scKoSrchOff01[] = { 3, 9, 13 }; |
-+static const int32_t scKoSrchOff23[] = { 5, 21, 25 }; |
-+static const int32_t scKoSrchOff45[] = { 7, 30 }; |
-+ |
-+static const PatternAndOffsets scKoSrchPatternsOffsets[] = { |
-+ { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) }, |
-+ { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) }, |
-+ { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) }, |
-+ { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) }, |
-+ { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) }, |
-+ { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) }, |
-+ { NULL, NULL, 0 } |
-+}; |
-+ |
-+static const int32_t scKoStndOff01[] = { 3, 9 }; |
-+static const int32_t scKoStndOff2[] = { 5, 21 }; |
-+static const int32_t scKoStndOff3[] = { 25 }; |
-+static const int32_t scKoStndOff45[] = { 7, 30 }; |
-+ |
-+static const PatternAndOffsets scKoStndPatternsOffsets[] = { |
-+ { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) }, |
-+ { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) }, |
-+ { scKoPat2, scKoStndOff2, ARRAY_LENGTH(scKoStndOff2) }, |
-+ { scKoPat3, scKoStndOff3, ARRAY_LENGTH(scKoStndOff3) }, |
-+ { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) }, |
-+ { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) }, |
-+ { NULL, NULL, 0 } |
-+}; |
-+ |
-+typedef struct { |
-+ const char * locale; |
-+ const UChar * text; |
-+ const PatternAndOffsets * patternsAndOffsets; |
-+} TUSCItem; |
-+ |
-+static const TUSCItem tuscItems[] = { |
-+ { "root", scKoText, scKoStndPatternsOffsets }, |
-+ { "root@collation=search", scKoText, scKoSrchPatternsOffsets }, |
-+ { "ko@collation=search", scKoText, scKoSrchPatternsOffsets }, |
-+ { NULL, NULL, NULL } |
-+}; |
-+ |
-+static const UChar dummyPat[] = { 0x0061, 0 }; |
-+ |
-+static void TestUsingSearchCollator(void) |
-+{ |
-+ const TUSCItem * tuscItemPtr; |
-+ for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) { |
-+ UErrorCode status = U_ZERO_ERROR; |
-+ UCollator* ucol = ucol_open(tuscItemPtr->locale, &status); |
-+ if ( U_SUCCESS(status) ) { |
-+ UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscItemPtr->text, -1, ucol, NULL, &status); |
-+ if ( U_SUCCESS(status) ) { |
-+ const PatternAndOffsets * patternsOffsetsPtr; |
-+ for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; patternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) { |
-+ usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1, &status); |
-+ if ( U_SUCCESS(status) ) { |
-+ int32_t offset; |
-+ const int32_t * nextOffsetPtr; |
-+ const int32_t * limitOffsetPtr; |
-+ |
-+ usearch_reset(usrch); |
-+ nextOffsetPtr = patternsOffsetsPtr->offsets; |
-+ limitOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen; |
-+ while (TRUE) { |
-+ offset = usearch_next(usrch, &status); |
-+ if ( U_FAILURE(status) || offset == USEARCH_DONE ) { |
-+ break; |
-+ } |
-+ if ( nextOffsetPtr < limitOffsetPtr ) { |
-+ if (offset != *nextOffsetPtr) { |
-+ log_err("error, locale %s, expected usearch_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset); |
-+ nextOffsetPtr = limitOffsetPtr; |
-+ break; |
-+ } |
-+ nextOffsetPtr++; |
-+ } else { |
-+ log_err("error, locale %s, usearch_next returned more matches than expected\n", tuscItemPtr->locale ); |
-+ } |
-+ } |
-+ if ( U_FAILURE(status) ) { |
-+ log_err("error, locale %s, usearch_next failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); |
-+ } else if ( nextOffsetPtr < limitOffsetPtr ) { |
-+ log_err("error, locale %s, usearch_next returned fewer matches than expected\n", tuscItemPtr->locale ); |
-+ } |
-+ |
-+ status = U_ZERO_ERROR; |
-+ usearch_reset(usrch); |
-+ nextOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen; |
-+ limitOffsetPtr = patternsOffsetsPtr->offsets; |
-+ while (TRUE) { |
-+ offset = usearch_previous(usrch, &status); |
-+ if ( U_FAILURE(status) || offset == USEARCH_DONE ) { |
-+ break; |
-+ } |
-+ if ( nextOffsetPtr > limitOffsetPtr ) { |
-+ nextOffsetPtr--; |
-+ if (offset != *nextOffsetPtr) { |
-+ log_err("error, locale %s, expected usearch_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset); |
-+ nextOffsetPtr = limitOffsetPtr; |
-+ break; |
-+ } |
-+ } else { |
-+ log_err("error, locale %s, usearch_previous returned more matches than expected\n", tuscItemPtr->locale ); |
-+ } |
-+ } |
-+ if ( U_FAILURE(status) ) { |
-+ log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); |
-+ } else if ( nextOffsetPtr > limitOffsetPtr ) { |
-+ log_err("error, locale %s, usearch_previous returned fewer matches than expected\n", tuscItemPtr->locale ); |
-+ } |
-+ |
-+ } else { |
-+ log_err("error, locale %s, usearch_setPattern failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); |
-+ } |
-+ } |
-+ usearch_close(usrch); |
-+ } else { |
-+ log_err("error, locale %s, usearch_openFromCollator failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); |
-+ } |
-+ ucol_close(ucol); |
-+ } else { |
-+ log_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); |
-+ } |
-+ } |
-+} |
-+ |
-+/** |
-+* addSearchTest |
-+*/ |
-+ |
- void addSearchTest(TestNode** root) |
- { |
- addTest(root, &TestStart, "tscoll/usrchtst/TestStart"); |
-@@ -2608,6 +2774,7 @@ |
- addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward"); |
- addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull"); |
- addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical"); |
-+ addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCollator"); |
- } |
- |
- #endif /* #if !UCONFIG_NO_COLLATION */ |
-Index: source/test/cintltst/citertst.c |
-=================================================================== |
---- source/test/cintltst/citertst.c (revision 75773) |
-+++ source/test/cintltst/citertst.c (working copy) |
-@@ -1,6 +1,6 @@ |
- /******************************************************************** |
- * COPYRIGHT: |
-- * Copyright (c) 1997-2010, International Business Machines Corporation and |
-+ * Copyright (c) 1997-2011, International Business Machines Corporation and |
- * others. All Rights Reserved. |
- ********************************************************************/ |
- /******************************************************************************** |
-@@ -22,6 +22,7 @@ |
- #if !UCONFIG_NO_COLLATION |
- |
- #include "unicode/ucol.h" |
-+#include "unicode/ucoleitr.h" |
- #include "unicode/uloc.h" |
- #include "unicode/uchar.h" |
- #include "unicode/ustring.h" |
-@@ -58,6 +59,7 @@ |
- addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow"); |
- addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity"); |
- addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity"); |
-+ addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements"); |
- } |
- |
- /* The locales we support */ |
-@@ -2017,4 +2019,141 @@ |
- T_FileStream_close(file); |
- } |
- |
-+/** |
-+* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with |
-+* normalization on AND jamo tailoring, among other things. |
-+*/ |
-+static const UChar tsceText[] = { /* Nothing in here should be ignorable */ |
-+ 0x0020, 0xAC00, /* simple LV Hangul */ |
-+ 0x0020, 0xAC01, /* simple LVT Hangul */ |
-+ 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */ |
-+ 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */ |
-+ 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */ |
-+ 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */ |
-+ 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */ |
-+ 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */ |
-+ 0x0020, 0x00E6, /* small letter ae, expands */ |
-+ 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */ |
-+ 0x0020 |
-+}; |
-+enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) }; |
-+ |
-+static const int32_t rootStandardOffsets[] = { |
-+ 0, 1,2, |
-+ 2, 3,4,4, |
-+ 4, 5,6,6, |
-+ 6, 7,8,8, |
-+ 8, 9,10,11, |
-+ 12, 13,14,15, |
-+ 16, 17,18,19, |
-+ 20, 21,22,23, |
-+ 24, 25,26,26,26, |
-+ 26, 27,28,28, |
-+ 28, |
-+ 29 |
-+}; |
-+enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) }; |
-+ |
-+static const int32_t rootSearchOffsets[] = { |
-+ 0, 1,2, |
-+ 2, 3,4,4, |
-+ 4, 5,6,6,6, |
-+ 6, 7,8,8,8,8,8,8, |
-+ 8, 9,10,11, |
-+ 12, 13,14,15, |
-+ 16, 17,18,19,20, |
-+ 20, 21,22,22,23,23,23,24, |
-+ 24, 25,26,26,26, |
-+ 26, 27,28,28, |
-+ 28, |
-+ 29 |
-+}; |
-+enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) }; |
-+ |
-+typedef struct { |
-+ const char * locale; |
-+ const int32_t * offsets; |
-+ int32_t offsetsLen; |
-+} TSCEItem; |
-+ |
-+static const TSCEItem tsceItems[] = { |
-+ { "root", rootStandardOffsets, kLen_rootStandardOffsets }, |
-+ { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets }, |
-+ { NULL, NULL, 0 } |
-+}; |
-+ |
-+static void TestSearchCollatorElements(void) |
-+{ |
-+ const TSCEItem * tsceItemPtr; |
-+ for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) { |
-+ UErrorCode status = U_ZERO_ERROR; |
-+ UCollator* ucol = ucol_open(tsceItemPtr->locale, &status); |
-+ if ( U_SUCCESS(status) ) { |
-+ UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status); |
-+ if ( U_SUCCESS(status) ) { |
-+ int32_t offset, element; |
-+ const int32_t * nextOffsetPtr; |
-+ const int32_t * limitOffsetPtr; |
-+ |
-+ nextOffsetPtr = tsceItemPtr->offsets; |
-+ limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; |
-+ do { |
-+ offset = ucol_getOffset(uce); |
-+ element = ucol_next(uce, &status); |
-+ if ( element == 0 ) { |
-+ log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale ); |
-+ } |
-+ if ( nextOffsetPtr < limitOffsetPtr ) { |
-+ if (offset != *nextOffsetPtr) { |
-+ log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n", |
-+ tsceItemPtr->locale, *nextOffsetPtr, offset ); |
-+ nextOffsetPtr = limitOffsetPtr; |
-+ break; |
-+ } |
-+ nextOffsetPtr++; |
-+ } else { |
-+ log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale ); |
-+ } |
-+ } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); |
-+ if ( nextOffsetPtr < limitOffsetPtr ) { |
-+ log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale ); |
-+ } |
-+ |
-+ ucol_setOffset(uce, kLen_tsceText, &status); |
-+ status = U_ZERO_ERROR; |
-+ nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; |
-+ limitOffsetPtr = tsceItemPtr->offsets; |
-+ do { |
-+ offset = ucol_getOffset(uce); |
-+ element = ucol_previous(uce, &status); |
-+ if ( element == 0 ) { |
-+ log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale ); |
-+ } |
-+ if ( nextOffsetPtr > limitOffsetPtr ) { |
-+ nextOffsetPtr--; |
-+ if (offset != *nextOffsetPtr) { |
-+ log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n", |
-+ tsceItemPtr->locale, *nextOffsetPtr, offset ); |
-+ nextOffsetPtr = limitOffsetPtr; |
-+ break; |
-+ } |
-+ } else { |
-+ log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale ); |
-+ } |
-+ } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); |
-+ if ( nextOffsetPtr > limitOffsetPtr ) { |
-+ log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale ); |
-+ } |
-+ |
-+ ucol_closeElements(uce); |
-+ } else { |
-+ log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) ); |
-+ } |
-+ ucol_close(ucol); |
-+ } else { |
-+ log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) ); |
-+ } |
-+ } |
-+} |
-+ |
- #endif /* #if !UCONFIG_NO_COLLATION */ |
-Index: source/test/cintltst/citertst.h |
-=================================================================== |
---- source/test/cintltst/citertst.h (revision 75773) |
-+++ source/test/cintltst/citertst.h (working copy) |
-@@ -1,6 +1,6 @@ |
- /******************************************************************** |
- * COPYRIGHT: |
-- * Copyright (c) 1997-2008, International Business Machines Corporation and |
-+ * Copyright (c) 1997-2008,2011, International Business Machines Corporation and |
- * others. All Rights Reserved. |
- ********************************************************************/ |
- /******************************************************************************** |
-@@ -101,6 +101,11 @@ |
- * Bound checkings. |
- */ |
- static void TestSortKeyValidity(void); |
-+/** |
-+* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with |
-+* normalization on AND jamo tailoring, among other things. |
-+*/ |
-+static void TestSearchCollatorElements(void); |
- |
- /*------------------------------------------------------------------------ |
- Internal utilities |
-Index: source/i18n/ucol.cpp |
-=================================================================== |
---- source/i18n/ucol.cpp (revision 75773) |
-+++ source/i18n/ucol.cpp (working copy) |
-@@ -1,6 +1,6 @@ |
- /* |
- ******************************************************************************* |
--* Copyright (C) 1996-2010, International Business Machines |
-+* Copyright (C) 1996-2011, International Business Machines |
- * Corporation and others. All Rights Reserved. |
- ******************************************************************************* |
- * file name: ucol.cpp |
-@@ -1444,173 +1444,176 @@ |
- UChar ch = 0; |
- collationSource->offsetReturn = NULL; |
- |
-- for (;;) /* Loop handles case when incremental normalize switches */ |
-- { /* to or from the side buffer / original string, and we */ |
-- /* need to start again to get the next character. */ |
-+ do { |
-+ for (;;) /* Loop handles case when incremental normalize switches */ |
-+ { /* to or from the side buffer / original string, and we */ |
-+ /* need to start again to get the next character. */ |
- |
-- if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) |
-- { |
-- // The source string is null terminated and we're not working from the side buffer, |
-- // and we're not normalizing. This is the fast path. |
-- // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.) |
-- ch = *collationSource->pos++; |
-- if (ch != 0) { |
-- break; |
-+ if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) |
-+ { |
-+ // The source string is null terminated and we're not working from the side buffer, |
-+ // and we're not normalizing. This is the fast path. |
-+ // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.) |
-+ ch = *collationSource->pos++; |
-+ if (ch != 0) { |
-+ break; |
-+ } |
-+ else { |
-+ return UCOL_NO_MORE_CES; |
-+ } |
- } |
-- else { |
-- return UCOL_NO_MORE_CES; |
-- } |
-- } |
- |
-- if (collationSource->flags & UCOL_ITER_HASLEN) { |
-- // Normal path for strings when length is specified. |
-- // (We can't be in side buffer because it is always null terminated.) |
-- if (collationSource->pos >= collationSource->endp) { |
-- // Ran off of the end of the main source string. We're done. |
-- return UCOL_NO_MORE_CES; |
-+ if (collationSource->flags & UCOL_ITER_HASLEN) { |
-+ // Normal path for strings when length is specified. |
-+ // (We can't be in side buffer because it is always null terminated.) |
-+ if (collationSource->pos >= collationSource->endp) { |
-+ // Ran off of the end of the main source string. We're done. |
-+ return UCOL_NO_MORE_CES; |
-+ } |
-+ ch = *collationSource->pos++; |
- } |
-- ch = *collationSource->pos++; |
-- } |
-- else if(collationSource->flags & UCOL_USE_ITERATOR) { |
-- UChar32 iterCh = collationSource->iterator->next(collationSource->iterator); |
-- if(iterCh == U_SENTINEL) { |
-- return UCOL_NO_MORE_CES; |
-- } |
-- ch = (UChar)iterCh; |
-- } |
-- else |
-- { |
-- // Null terminated string. |
-- ch = *collationSource->pos++; |
-- if (ch == 0) { |
-- // Ran off end of buffer. |
-- if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { |
-- // Ran off end of main string. backing up one character. |
-- collationSource->pos--; |
-+ else if(collationSource->flags & UCOL_USE_ITERATOR) { |
-+ UChar32 iterCh = collationSource->iterator->next(collationSource->iterator); |
-+ if(iterCh == U_SENTINEL) { |
- return UCOL_NO_MORE_CES; |
- } |
-- else |
-- { |
-- // Hit null in the normalize side buffer. |
-- // Usually this means the end of the normalized data, |
-- // except for one odd case: a null followed by combining chars, |
-- // which is the case if we are at the start of the buffer. |
-- if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) { |
-- break; |
-+ ch = (UChar)iterCh; |
-+ } |
-+ else |
-+ { |
-+ // Null terminated string. |
-+ ch = *collationSource->pos++; |
-+ if (ch == 0) { |
-+ // Ran off end of buffer. |
-+ if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { |
-+ // Ran off end of main string. backing up one character. |
-+ collationSource->pos--; |
-+ return UCOL_NO_MORE_CES; |
- } |
-+ else |
-+ { |
-+ // Hit null in the normalize side buffer. |
-+ // Usually this means the end of the normalized data, |
-+ // except for one odd case: a null followed by combining chars, |
-+ // which is the case if we are at the start of the buffer. |
-+ if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) { |
-+ break; |
-+ } |
- |
-- // Null marked end of side buffer. |
-- // Revert to the main string and |
-- // loop back to top to try again to get a character. |
-- collationSource->pos = collationSource->fcdPosition; |
-- collationSource->flags = collationSource->origFlags; |
-- continue; |
-+ // Null marked end of side buffer. |
-+ // Revert to the main string and |
-+ // loop back to top to try again to get a character. |
-+ collationSource->pos = collationSource->fcdPosition; |
-+ collationSource->flags = collationSource->origFlags; |
-+ continue; |
-+ } |
- } |
- } |
-- } |
- |
-- if(collationSource->flags&UCOL_HIRAGANA_Q) { |
-- /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag |
-- * based on whether the previous codepoint was Hiragana or Katakana. |
-- */ |
-- if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) || |
-- ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) { |
-- collationSource->flags |= UCOL_WAS_HIRAGANA; |
-- } else { |
-- collationSource->flags &= ~UCOL_WAS_HIRAGANA; |
-+ if(collationSource->flags&UCOL_HIRAGANA_Q) { |
-+ /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag |
-+ * based on whether the previous codepoint was Hiragana or Katakana. |
-+ */ |
-+ if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) || |
-+ ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) { |
-+ collationSource->flags |= UCOL_WAS_HIRAGANA; |
-+ } else { |
-+ collationSource->flags &= ~UCOL_WAS_HIRAGANA; |
-+ } |
- } |
-- } |
- |
-- // We've got a character. See if there's any fcd and/or normalization stuff to do. |
-- // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer. |
-- if ((collationSource->flags & UCOL_ITER_NORM) == 0) { |
-- break; |
-- } |
-+ // We've got a character. See if there's any fcd and/or normalization stuff to do. |
-+ // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer. |
-+ if ((collationSource->flags & UCOL_ITER_NORM) == 0) { |
-+ break; |
-+ } |
- |
-- if (collationSource->fcdPosition >= collationSource->pos) { |
-- // An earlier FCD check has already covered the current character. |
-- // We can go ahead and process this char. |
-- break; |
-- } |
-- |
-- if (ch < ZERO_CC_LIMIT_ ) { |
-- // Fast fcd safe path. Trailing combining class == 0. This char is OK. |
-- break; |
-- } |
-- |
-- if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { |
-- // We need to peek at the next character in order to tell if we are FCD |
-- if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) { |
-- // We are at the last char of source string. |
-- // It is always OK for FCD check. |
-+ if (collationSource->fcdPosition >= collationSource->pos) { |
-+ // An earlier FCD check has already covered the current character. |
-+ // We can go ahead and process this char. |
- break; |
- } |
- |
-- // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test |
-- if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { |
-+ if (ch < ZERO_CC_LIMIT_ ) { |
-+ // Fast fcd safe path. Trailing combining class == 0. This char is OK. |
- break; |
- } |
-- } |
- |
-+ if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { |
-+ // We need to peek at the next character in order to tell if we are FCD |
-+ if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) { |
-+ // We are at the last char of source string. |
-+ // It is always OK for FCD check. |
-+ break; |
-+ } |
- |
-- // Need a more complete FCD check and possible normalization. |
-- if (collIterFCD(collationSource)) { |
-- collIterNormalize(collationSource); |
-- } |
-- if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { |
-- // No normalization was needed. Go ahead and process the char we already had. |
-- break; |
-- } |
-+ // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test |
-+ if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { |
-+ break; |
-+ } |
-+ } |
- |
-- // Some normalization happened. Next loop iteration will pick up a char |
-- // from the normalization buffer. |
- |
-- } // end for (;;) |
-+ // Need a more complete FCD check and possible normalization. |
-+ if (collIterFCD(collationSource)) { |
-+ collIterNormalize(collationSource); |
-+ } |
-+ if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { |
-+ // No normalization was needed. Go ahead and process the char we already had. |
-+ break; |
-+ } |
- |
-+ // Some normalization happened. Next loop iteration will pick up a char |
-+ // from the normalization buffer. |
- |
-- if (ch <= 0xFF) { |
-- /* For latin-1 characters we never need to fall back to the UCA table */ |
-- /* because all of the UCA data is replicated in the latinOneMapping array */ |
-- order = coll->latinOneMapping[ch]; |
-- if (order > UCOL_NOT_FOUND) { |
-- order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); |
-+ } // end for (;;) |
-+ |
-+ |
-+ if (ch <= 0xFF) { |
-+ /* For latin-1 characters we never need to fall back to the UCA table */ |
-+ /* because all of the UCA data is replicated in the latinOneMapping array */ |
-+ order = coll->latinOneMapping[ch]; |
-+ if (order > UCOL_NOT_FOUND) { |
-+ order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); |
-+ } |
- } |
-- } |
-- else |
-- { |
-- // Always use UCA for Han, Hangul |
-- // (Han extension A is before main Han block) |
-- // **** Han compatibility chars ?? **** |
-- if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && |
-- (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { |
-- if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { |
-- // between the two target ranges; do normal lookup |
-- // **** this range is YI, Modifier tone letters, **** |
-- // **** Latin-D, Syloti Nagari, Phagas-pa. **** |
-- // **** Latin-D might be tailored, so we need to **** |
-- // **** do the normal lookup for these guys. **** |
-+ else |
-+ { |
-+ // Always use UCA for Han, Hangul |
-+ // (Han extension A is before main Han block) |
-+ // **** Han compatibility chars ?? **** |
-+ if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && |
-+ (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { |
-+ if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { |
-+ // between the two target ranges; do normal lookup |
-+ // **** this range is YI, Modifier tone letters, **** |
-+ // **** Latin-D, Syloti Nagari, Phagas-pa. **** |
-+ // **** Latin-D might be tailored, so we need to **** |
-+ // **** do the normal lookup for these guys. **** |
-+ order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |
-+ } else { |
-+ // in one of the target ranges; use UCA |
-+ order = UCOL_NOT_FOUND; |
-+ } |
-+ } else { |
- order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |
-- } else { |
-- // in one of the target ranges; use UCA |
-- order = UCOL_NOT_FOUND; |
- } |
-- } else { |
-- order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |
-- } |
- |
-- if(order > UCOL_NOT_FOUND) { /* if a CE is special */ |
-- order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */ |
-- } |
-+ if(order > UCOL_NOT_FOUND) { /* if a CE is special */ |
-+ order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */ |
-+ } |
- |
-- if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */ |
-- /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */ |
-- order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); |
-+ if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */ |
-+ /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */ |
-+ order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); |
- |
-- if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */ |
-- order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status); |
-+ if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */ |
-+ order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status); |
-+ } |
- } |
- } |
-- } |
-+ } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL ); |
-+ |
- if(order == UCOL_NOT_FOUND) { |
- order = getImplicit(ch, collationSource); |
- } |
-@@ -1958,161 +1961,163 @@ |
- else { |
- UChar ch = 0; |
- |
-- /* |
-- Loop handles case when incremental normalize switches to or from the |
-- side buffer / original string, and we need to start again to get the |
-- next character. |
-- */ |
-- for (;;) { |
-- if (data->flags & UCOL_ITER_HASLEN) { |
-- /* |
-- Normal path for strings when length is specified. |
-- Not in side buffer because it is always null terminated. |
-- */ |
-- if (data->pos <= data->string) { |
-- /* End of the main source string */ |
-- return UCOL_NO_MORE_CES; |
-- } |
-- data->pos --; |
-- ch = *data->pos; |
-- } |
-- // we are using an iterator to go back. Pray for us! |
-- else if (data->flags & UCOL_USE_ITERATOR) { |
-- UChar32 iterCh = data->iterator->previous(data->iterator); |
-- if(iterCh == U_SENTINEL) { |
-- return UCOL_NO_MORE_CES; |
-- } else { |
-- ch = (UChar)iterCh; |
-- } |
-- } |
-- else { |
-- data->pos --; |
-- ch = *data->pos; |
-- /* we are in the side buffer. */ |
-- if (ch == 0) { |
-+ do { |
-+ /* |
-+ Loop handles case when incremental normalize switches to or from the |
-+ side buffer / original string, and we need to start again to get the |
-+ next character. |
-+ */ |
-+ for (;;) { |
-+ if (data->flags & UCOL_ITER_HASLEN) { |
- /* |
-- At the start of the normalize side buffer. |
-- Go back to string. |
-- Because pointer points to the last accessed character, |
-- hence we have to increment it by one here. |
-+ Normal path for strings when length is specified. |
-+ Not in side buffer because it is always null terminated. |
- */ |
-- data->flags = data->origFlags; |
-- data->offsetRepeatValue = 0; |
-- |
-- if (data->fcdPosition == NULL) { |
-- data->pos = data->string; |
-+ if (data->pos <= data->string) { |
-+ /* End of the main source string */ |
- return UCOL_NO_MORE_CES; |
- } |
-- else { |
-- data->pos = data->fcdPosition + 1; |
-+ data->pos --; |
-+ ch = *data->pos; |
-+ } |
-+ // we are using an iterator to go back. Pray for us! |
-+ else if (data->flags & UCOL_USE_ITERATOR) { |
-+ UChar32 iterCh = data->iterator->previous(data->iterator); |
-+ if(iterCh == U_SENTINEL) { |
-+ return UCOL_NO_MORE_CES; |
-+ } else { |
-+ ch = (UChar)iterCh; |
-+ } |
-+ } |
-+ else { |
-+ data->pos --; |
-+ ch = *data->pos; |
-+ /* we are in the side buffer. */ |
-+ if (ch == 0) { |
-+ /* |
-+ At the start of the normalize side buffer. |
-+ Go back to string. |
-+ Because pointer points to the last accessed character, |
-+ hence we have to increment it by one here. |
-+ */ |
-+ data->flags = data->origFlags; |
-+ data->offsetRepeatValue = 0; |
-+ |
-+ if (data->fcdPosition == NULL) { |
-+ data->pos = data->string; |
-+ return UCOL_NO_MORE_CES; |
-+ } |
-+ else { |
-+ data->pos = data->fcdPosition + 1; |
-+ } |
-+ |
-+ continue; |
- } |
-- |
-- continue; |
- } |
-- } |
- |
-- if(data->flags&UCOL_HIRAGANA_Q) { |
-- if(ch>=0x3040 && ch<=0x309f) { |
-- data->flags |= UCOL_WAS_HIRAGANA; |
-- } else { |
-- data->flags &= ~UCOL_WAS_HIRAGANA; |
-- } |
-- } |
-+ if(data->flags&UCOL_HIRAGANA_Q) { |
-+ if(ch>=0x3040 && ch<=0x309f) { |
-+ data->flags |= UCOL_WAS_HIRAGANA; |
-+ } else { |
-+ data->flags &= ~UCOL_WAS_HIRAGANA; |
-+ } |
-+ } |
- |
-- /* |
-- * got a character to determine if there's fcd and/or normalization |
-- * stuff to do. |
-- * if the current character is not fcd. |
-- * if current character is at the start of the string |
-- * Trailing combining class == 0. |
-- * Note if pos is in the writablebuffer, norm is always 0 |
-- */ |
-- if (ch < ZERO_CC_LIMIT_ || |
-- // this should propel us out of the loop in the iterator case |
-- (data->flags & UCOL_ITER_NORM) == 0 || |
-- (data->fcdPosition != NULL && data->fcdPosition <= data->pos) |
-- || data->string == data->pos) { |
-- break; |
-- } |
-- |
-- if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { |
-- /* if next character is FCD */ |
-- if (data->pos == data->string) { |
-- /* First char of string is always OK for FCD check */ |
-+ /* |
-+ * got a character to determine if there's fcd and/or normalization |
-+ * stuff to do. |
-+ * if the current character is not fcd. |
-+ * if current character is at the start of the string |
-+ * Trailing combining class == 0. |
-+ * Note if pos is in the writablebuffer, norm is always 0 |
-+ */ |
-+ if (ch < ZERO_CC_LIMIT_ || |
-+ // this should propel us out of the loop in the iterator case |
-+ (data->flags & UCOL_ITER_NORM) == 0 || |
-+ (data->fcdPosition != NULL && data->fcdPosition <= data->pos) |
-+ || data->string == data->pos) { |
- break; |
- } |
- |
-- /* Not first char of string, do the FCD fast test */ |
-- if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { |
-+ if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { |
-+ /* if next character is FCD */ |
-+ if (data->pos == data->string) { |
-+ /* First char of string is always OK for FCD check */ |
-+ break; |
-+ } |
-+ |
-+ /* Not first char of string, do the FCD fast test */ |
-+ if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { |
-+ break; |
-+ } |
-+ } |
-+ |
-+ /* Need a more complete FCD check and possible normalization. */ |
-+ if (collPrevIterFCD(data)) { |
-+ collPrevIterNormalize(data); |
-+ } |
-+ |
-+ if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { |
-+ /* No normalization. Go ahead and process the char. */ |
- break; |
- } |
-- } |
- |
-- /* Need a more complete FCD check and possible normalization. */ |
-- if (collPrevIterFCD(data)) { |
-- collPrevIterNormalize(data); |
-+ /* |
-+ Some normalization happened. |
-+ Next loop picks up a char from the normalization buffer. |
-+ */ |
- } |
- |
-- if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { |
-- /* No normalization. Go ahead and process the char. */ |
-- break; |
-- } |
-- |
-- /* |
-- Some normalization happened. |
-- Next loop picks up a char from the normalization buffer. |
-+ /* attempt to handle contractions, after removal of the backwards |
-+ contraction |
- */ |
-- } |
-- |
-- /* attempt to handle contractions, after removal of the backwards |
-- contraction |
-- */ |
-- if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) { |
-- result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status); |
-- } else { |
-- if (ch <= 0xFF) { |
-- result = coll->latinOneMapping[ch]; |
-- } |
-- else { |
-- // Always use UCA for [3400..9FFF], [AC00..D7AF] |
-- // **** [FA0E..FA2F] ?? **** |
-- if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && |
-- (ch >= 0x3400 && ch <= 0xD7AF)) { |
-- if (ch > 0x9FFF && ch < 0xAC00) { |
-- // between the two target ranges; do normal lookup |
-- // **** this range is YI, Modifier tone letters, **** |
-- // **** Latin-D, Syloti Nagari, Phagas-pa. **** |
-- // **** Latin-D might be tailored, so we need to **** |
-- // **** do the normal lookup for these guys. **** |
-- result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |
-+ if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) { |
-+ result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status); |
-+ } else { |
-+ if (ch <= 0xFF) { |
-+ result = coll->latinOneMapping[ch]; |
-+ } |
-+ else { |
-+ // Always use UCA for [3400..9FFF], [AC00..D7AF] |
-+ // **** [FA0E..FA2F] ?? **** |
-+ if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && |
-+ (ch >= 0x3400 && ch <= 0xD7AF)) { |
-+ if (ch > 0x9FFF && ch < 0xAC00) { |
-+ // between the two target ranges; do normal lookup |
-+ // **** this range is YI, Modifier tone letters, **** |
-+ // **** Latin-D, Syloti Nagari, Phagas-pa. **** |
-+ // **** Latin-D might be tailored, so we need to **** |
-+ // **** do the normal lookup for these guys. **** |
-+ result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |
-+ } else { |
-+ result = UCOL_NOT_FOUND; |
-+ } |
- } else { |
-- result = UCOL_NOT_FOUND; |
-+ result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |
- } |
-- } else { |
-- result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |
- } |
-- } |
-- if (result > UCOL_NOT_FOUND) { |
-- result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status); |
-- } |
-- if (result == UCOL_NOT_FOUND) { // Not found in master list |
-- if (!isAtStartPrevIterate(data) && |
-- ucol_contractionEndCP(ch, data->coll)) |
-- { |
-- result = UCOL_CONTRACTION; |
-- } else { |
-- if(coll->UCA) { |
-- result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); |
-+ if (result > UCOL_NOT_FOUND) { |
-+ result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status); |
-+ } |
-+ if (result == UCOL_NOT_FOUND) { // Not found in master list |
-+ if (!isAtStartPrevIterate(data) && |
-+ ucol_contractionEndCP(ch, data->coll)) |
-+ { |
-+ result = UCOL_CONTRACTION; |
-+ } else { |
-+ if(coll->UCA) { |
-+ result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); |
-+ } |
- } |
-- } |
- |
-- if (result > UCOL_NOT_FOUND) { |
-- if(coll->UCA) { |
-- result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status); |
-+ if (result > UCOL_NOT_FOUND) { |
-+ if(coll->UCA) { |
-+ result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status); |
-+ } |
- } |
- } |
- } |
-- } |
-+ } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL ); |
- |
- if(result == UCOL_NOT_FOUND) { |
- result = getPrevImplicit(ch, data); |
-@@ -3193,6 +3198,7 @@ |
- // Since Hanguls pass the FCD check, it is |
- // guaranteed that we won't be in |
- // the normalization buffer if something like this happens |
-+ |
- // However, if we are using a uchar iterator and normalization |
- // is ON, the Hangul that lead us here is going to be in that |
- // normalization buffer. Here we want to restore the uchar |
-@@ -3201,6 +3207,7 @@ |
- source->flags = source->origFlags; // restore the iterator |
- source->pos = NULL; |
- } |
-+ |
- // Move Jamos into normalization buffer |
- UChar *buffer = source->writableBuffer.getBuffer(4); |
- int32_t bufferLength; |
-@@ -3214,8 +3221,9 @@ |
- } |
- source->writableBuffer.releaseBuffer(bufferLength); |
- |
-- source->fcdPosition = source->pos; // Indicate where to continue in main input string |
-- // after exhausting the writableBuffer |
-+ // Indicate where to continue in main input string after exhausting the writableBuffer |
-+ source->fcdPosition = source->pos; |
-+ |
- source->pos = source->writableBuffer.getTerminatedBuffer(); |
- source->origFlags = source->flags; |
- source->flags |= UCOL_ITER_INNORMBUF; |
-@@ -3966,13 +3974,10 @@ |
- // Since Hanguls pass the FCD check, it is |
- // guaranteed that we won't be in |
- // the normalization buffer if something like this happens |
-+ |
- // Move Jamos into normalization buffer |
-- /* |
-- Move the Jamos into the |
-- normalization buffer |
-- */ |
- UChar *tempbuffer = source->writableBuffer.getBuffer(5); |
-- int32_t tempbufferLength; |
-+ int32_t tempbufferLength, jamoOffset; |
- tempbuffer[0] = 0; |
- tempbuffer[1] = (UChar)L; |
- tempbuffer[2] = (UChar)V; |
-@@ -3984,16 +3989,30 @@ |
- } |
- source->writableBuffer.releaseBuffer(tempbufferLength); |
- |
-- /* |
-- Indicate where to continue in main input string after exhausting |
-- the writableBuffer |
-- */ |
-+ // Indicate where to continue in main input string after exhausting the writableBuffer |
- if (source->pos == source->string) { |
-+ jamoOffset = 0; |
- source->fcdPosition = NULL; |
- } else { |
-+ jamoOffset = source->pos - source->string; |
- source->fcdPosition = source->pos-1; |
- } |
-+ |
-+ // Append offsets for the additional chars |
-+ // (not the 0, and not the L whose offsets match the original Hangul) |
-+ int32_t jamoRemaining = tempbufferLength - 2; |
-+ jamoOffset++; // appended offsets should match end of original Hangul |
-+ while (jamoRemaining-- > 0) { |
-+ source->appendOffset(jamoOffset, *status); |
-+ } |
- |
-+ source->offsetRepeatValue = jamoOffset; |
-+ |
-+ source->offsetReturn = source->offsetStore - 1; |
-+ if (source->offsetReturn == source->offsetBuffer) { |
-+ source->offsetStore = source->offsetBuffer; |
-+ } |
-+ |
- source->pos = source->writableBuffer.getTerminatedBuffer() + tempbufferLength; |
- source->origFlags = source->flags; |
- source->flags |= UCOL_ITER_INNORMBUF; |