Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(252)

Unified Diff: icu52/patches/search_collation.patch

Issue 224943002: icu local change part1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/
Patch Set: function indentation changed Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « icu52/patches/rtti.patch ('k') | icu52/patches/segmentation.patch » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: icu52/patches/search_collation.patch
===================================================================
--- icu52/patches/search_collation.patch (revision 261238)
+++ icu52/patches/search_collation.patch (working copy)
@@ -1,1083 +0,0 @@
-Index: source/test/cintltst/usrchtst.c
-===================================================================
---- source/test/cintltst/usrchtst.c (revision 75773)
-+++ source/test/cintltst/usrchtst.c (working copy)
-@@ -1,5 +1,5 @@
- /********************************************************************
-- * Copyright (c) 2001-2010 International Business Machines
-+ * Copyright (c) 2001-2011 International Business Machines
- * Corporation and others. All Rights Reserved.
- ********************************************************************
- * File usrchtst.c
-@@ -2553,7 +2553,173 @@
- ucol_close(coll);
- }
-
-+/**
-+* TestUsingSearchCollator
-+*/
-
-+#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0]))
-+
-+typedef struct {
-+ const UChar * pattern;
-+ const int32_t * offsets;
-+ int32_t offsetsLen;
-+} PatternAndOffsets;
-+
-+static const UChar scKoText[] = {
-+ 0x0020,
-+/*01*/ 0xAC00, 0x0020, /* simple LV Hangul */
-+/*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */
-+/*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for search */
-+/*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands for search */
-+/*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */
-+/*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining jamo (per search rules) */
-+/*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo */
-+/*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; last expands for search */
-+/*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining jamo; last expands for search */
-+/*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; all expand for search */
-+/*34*/ 0x00E6, 0x0020, /* small letter ae, expands */
-+/*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and acute, decomposes */
-+ 0
-+};
-+
-+static const UChar scKoPat0[] = { 0xAC01, 0 };
-+static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as conjoining jamo */
-+static const UChar scKoPat2[] = { 0xAC0F, 0 };
-+static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0F as basic conjoining jamo */
-+static const UChar scKoPat4[] = { 0xAFFF, 0 };
-+static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as conjoining jamo */
-+
-+static const int32_t scKoSrchOff01[] = { 3, 9, 13 };
-+static const int32_t scKoSrchOff23[] = { 5, 21, 25 };
-+static const int32_t scKoSrchOff45[] = { 7, 30 };
-+
-+static const PatternAndOffsets scKoSrchPatternsOffsets[] = {
-+ { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },
-+ { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },
-+ { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },
-+ { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },
-+ { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },
-+ { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },
-+ { NULL, NULL, 0 }
-+};
-+
-+static const int32_t scKoStndOff01[] = { 3, 9 };
-+static const int32_t scKoStndOff2[] = { 5, 21 };
-+static const int32_t scKoStndOff3[] = { 25 };
-+static const int32_t scKoStndOff45[] = { 7, 30 };
-+
-+static const PatternAndOffsets scKoStndPatternsOffsets[] = {
-+ { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },
-+ { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },
-+ { scKoPat2, scKoStndOff2, ARRAY_LENGTH(scKoStndOff2) },
-+ { scKoPat3, scKoStndOff3, ARRAY_LENGTH(scKoStndOff3) },
-+ { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },
-+ { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },
-+ { NULL, NULL, 0 }
-+};
-+
-+typedef struct {
-+ const char * locale;
-+ const UChar * text;
-+ const PatternAndOffsets * patternsAndOffsets;
-+} TUSCItem;
-+
-+static const TUSCItem tuscItems[] = {
-+ { "root", scKoText, scKoStndPatternsOffsets },
-+ { "root@collation=search", scKoText, scKoSrchPatternsOffsets },
-+ { "ko@collation=search", scKoText, scKoSrchPatternsOffsets },
-+ { NULL, NULL, NULL }
-+};
-+
-+static const UChar dummyPat[] = { 0x0061, 0 };
-+
-+static void TestUsingSearchCollator(void)
-+{
-+ const TUSCItem * tuscItemPtr;
-+ for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) {
-+ UErrorCode status = U_ZERO_ERROR;
-+ UCollator* ucol = ucol_open(tuscItemPtr->locale, &status);
-+ if ( U_SUCCESS(status) ) {
-+ UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscItemPtr->text, -1, ucol, NULL, &status);
-+ if ( U_SUCCESS(status) ) {
-+ const PatternAndOffsets * patternsOffsetsPtr;
-+ for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; patternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) {
-+ usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1, &status);
-+ if ( U_SUCCESS(status) ) {
-+ int32_t offset;
-+ const int32_t * nextOffsetPtr;
-+ const int32_t * limitOffsetPtr;
-+
-+ usearch_reset(usrch);
-+ nextOffsetPtr = patternsOffsetsPtr->offsets;
-+ limitOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;
-+ while (TRUE) {
-+ offset = usearch_next(usrch, &status);
-+ if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
-+ break;
-+ }
-+ if ( nextOffsetPtr < limitOffsetPtr ) {
-+ if (offset != *nextOffsetPtr) {
-+ log_err("error, locale %s, expected usearch_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);
-+ nextOffsetPtr = limitOffsetPtr;
-+ break;
-+ }
-+ nextOffsetPtr++;
-+ } else {
-+ log_err("error, locale %s, usearch_next returned more matches than expected\n", tuscItemPtr->locale );
-+ }
-+ }
-+ if ( U_FAILURE(status) ) {
-+ log_err("error, locale %s, usearch_next failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
-+ } else if ( nextOffsetPtr < limitOffsetPtr ) {
-+ log_err("error, locale %s, usearch_next returned fewer matches than expected\n", tuscItemPtr->locale );
-+ }
-+
-+ status = U_ZERO_ERROR;
-+ usearch_reset(usrch);
-+ nextOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;
-+ limitOffsetPtr = patternsOffsetsPtr->offsets;
-+ while (TRUE) {
-+ offset = usearch_previous(usrch, &status);
-+ if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
-+ break;
-+ }
-+ if ( nextOffsetPtr > limitOffsetPtr ) {
-+ nextOffsetPtr--;
-+ if (offset != *nextOffsetPtr) {
-+ log_err("error, locale %s, expected usearch_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);
-+ nextOffsetPtr = limitOffsetPtr;
-+ break;
-+ }
-+ } else {
-+ log_err("error, locale %s, usearch_previous returned more matches than expected\n", tuscItemPtr->locale );
-+ }
-+ }
-+ if ( U_FAILURE(status) ) {
-+ log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
-+ } else if ( nextOffsetPtr > limitOffsetPtr ) {
-+ log_err("error, locale %s, usearch_previous returned fewer matches than expected\n", tuscItemPtr->locale );
-+ }
-+
-+ } else {
-+ log_err("error, locale %s, usearch_setPattern failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
-+ }
-+ }
-+ usearch_close(usrch);
-+ } else {
-+ log_err("error, locale %s, usearch_openFromCollator failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
-+ }
-+ ucol_close(ucol);
-+ } else {
-+ log_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
-+ }
-+ }
-+}
-+
-+/**
-+* addSearchTest
-+*/
-+
- void addSearchTest(TestNode** root)
- {
- addTest(root, &TestStart, "tscoll/usrchtst/TestStart");
-@@ -2608,6 +2774,7 @@
- addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward");
- addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");
- addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical");
-+ addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCollator");
- }
-
- #endif /* #if !UCONFIG_NO_COLLATION */
-Index: source/test/cintltst/citertst.c
-===================================================================
---- source/test/cintltst/citertst.c (revision 75773)
-+++ source/test/cintltst/citertst.c (working copy)
-@@ -1,6 +1,6 @@
- /********************************************************************
- * COPYRIGHT:
-- * Copyright (c) 1997-2010, International Business Machines Corporation and
-+ * Copyright (c) 1997-2011, International Business Machines Corporation and
- * others. All Rights Reserved.
- ********************************************************************/
- /********************************************************************************
-@@ -22,6 +22,7 @@
- #if !UCONFIG_NO_COLLATION
-
- #include "unicode/ucol.h"
-+#include "unicode/ucoleitr.h"
- #include "unicode/uloc.h"
- #include "unicode/uchar.h"
- #include "unicode/ustring.h"
-@@ -58,6 +59,7 @@
- addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
- addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
- addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
-+ addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
- }
-
- /* The locales we support */
-@@ -2017,4 +2019,141 @@
- T_FileStream_close(file);
- }
-
-+/**
-+* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
-+* normalization on AND jamo tailoring, among other things.
-+*/
-+static const UChar tsceText[] = { /* Nothing in here should be ignorable */
-+ 0x0020, 0xAC00, /* simple LV Hangul */
-+ 0x0020, 0xAC01, /* simple LVT Hangul */
-+ 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
-+ 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
-+ 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
-+ 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
-+ 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
-+ 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
-+ 0x0020, 0x00E6, /* small letter ae, expands */
-+ 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
-+ 0x0020
-+};
-+enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
-+
-+static const int32_t rootStandardOffsets[] = {
-+ 0, 1,2,
-+ 2, 3,4,4,
-+ 4, 5,6,6,
-+ 6, 7,8,8,
-+ 8, 9,10,11,
-+ 12, 13,14,15,
-+ 16, 17,18,19,
-+ 20, 21,22,23,
-+ 24, 25,26,26,26,
-+ 26, 27,28,28,
-+ 28,
-+ 29
-+};
-+enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
-+
-+static const int32_t rootSearchOffsets[] = {
-+ 0, 1,2,
-+ 2, 3,4,4,
-+ 4, 5,6,6,6,
-+ 6, 7,8,8,8,8,8,8,
-+ 8, 9,10,11,
-+ 12, 13,14,15,
-+ 16, 17,18,19,20,
-+ 20, 21,22,22,23,23,23,24,
-+ 24, 25,26,26,26,
-+ 26, 27,28,28,
-+ 28,
-+ 29
-+};
-+enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
-+
-+typedef struct {
-+ const char * locale;
-+ const int32_t * offsets;
-+ int32_t offsetsLen;
-+} TSCEItem;
-+
-+static const TSCEItem tsceItems[] = {
-+ { "root", rootStandardOffsets, kLen_rootStandardOffsets },
-+ { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
-+ { NULL, NULL, 0 }
-+};
-+
-+static void TestSearchCollatorElements(void)
-+{
-+ const TSCEItem * tsceItemPtr;
-+ for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
-+ UErrorCode status = U_ZERO_ERROR;
-+ UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
-+ if ( U_SUCCESS(status) ) {
-+ UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
-+ if ( U_SUCCESS(status) ) {
-+ int32_t offset, element;
-+ const int32_t * nextOffsetPtr;
-+ const int32_t * limitOffsetPtr;
-+
-+ nextOffsetPtr = tsceItemPtr->offsets;
-+ limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
-+ do {
-+ offset = ucol_getOffset(uce);
-+ element = ucol_next(uce, &status);
-+ if ( element == 0 ) {
-+ log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
-+ }
-+ if ( nextOffsetPtr < limitOffsetPtr ) {
-+ if (offset != *nextOffsetPtr) {
-+ log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
-+ tsceItemPtr->locale, *nextOffsetPtr, offset );
-+ nextOffsetPtr = limitOffsetPtr;
-+ break;
-+ }
-+ nextOffsetPtr++;
-+ } else {
-+ log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
-+ }
-+ } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
-+ if ( nextOffsetPtr < limitOffsetPtr ) {
-+ log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
-+ }
-+
-+ ucol_setOffset(uce, kLen_tsceText, &status);
-+ status = U_ZERO_ERROR;
-+ nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
-+ limitOffsetPtr = tsceItemPtr->offsets;
-+ do {
-+ offset = ucol_getOffset(uce);
-+ element = ucol_previous(uce, &status);
-+ if ( element == 0 ) {
-+ log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
-+ }
-+ if ( nextOffsetPtr > limitOffsetPtr ) {
-+ nextOffsetPtr--;
-+ if (offset != *nextOffsetPtr) {
-+ log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
-+ tsceItemPtr->locale, *nextOffsetPtr, offset );
-+ nextOffsetPtr = limitOffsetPtr;
-+ break;
-+ }
-+ } else {
-+ log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
-+ }
-+ } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
-+ if ( nextOffsetPtr > limitOffsetPtr ) {
-+ log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
-+ }
-+
-+ ucol_closeElements(uce);
-+ } else {
-+ log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
-+ }
-+ ucol_close(ucol);
-+ } else {
-+ log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
-+ }
-+ }
-+}
-+
- #endif /* #if !UCONFIG_NO_COLLATION */
-Index: source/test/cintltst/citertst.h
-===================================================================
---- source/test/cintltst/citertst.h (revision 75773)
-+++ source/test/cintltst/citertst.h (working copy)
-@@ -1,6 +1,6 @@
- /********************************************************************
- * COPYRIGHT:
-- * Copyright (c) 1997-2008, International Business Machines Corporation and
-+ * Copyright (c) 1997-2008,2011, International Business Machines Corporation and
- * others. All Rights Reserved.
- ********************************************************************/
- /********************************************************************************
-@@ -101,6 +101,11 @@
- * Bound checkings.
- */
- static void TestSortKeyValidity(void);
-+/**
-+* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
-+* normalization on AND jamo tailoring, among other things.
-+*/
-+static void TestSearchCollatorElements(void);
-
- /*------------------------------------------------------------------------
- Internal utilities
-Index: source/i18n/ucol.cpp
-===================================================================
---- source/i18n/ucol.cpp (revision 75773)
-+++ source/i18n/ucol.cpp (working copy)
-@@ -1,6 +1,6 @@
- /*
- *******************************************************************************
--* Copyright (C) 1996-2010, International Business Machines
-+* Copyright (C) 1996-2011, International Business Machines
- * Corporation and others. All Rights Reserved.
- *******************************************************************************
- * file name: ucol.cpp
-@@ -1444,173 +1444,176 @@
- UChar ch = 0;
- collationSource->offsetReturn = NULL;
-
-- for (;;) /* Loop handles case when incremental normalize switches */
-- { /* to or from the side buffer / original string, and we */
-- /* need to start again to get the next character. */
-+ do {
-+ for (;;) /* Loop handles case when incremental normalize switches */
-+ { /* to or from the side buffer / original string, and we */
-+ /* need to start again to get the next character. */
-
-- if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
-- {
-- // The source string is null terminated and we're not working from the side buffer,
-- // and we're not normalizing. This is the fast path.
-- // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)
-- ch = *collationSource->pos++;
-- if (ch != 0) {
-- break;
-+ if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
-+ {
-+ // The source string is null terminated and we're not working from the side buffer,
-+ // and we're not normalizing. This is the fast path.
-+ // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)
-+ ch = *collationSource->pos++;
-+ if (ch != 0) {
-+ break;
-+ }
-+ else {
-+ return UCOL_NO_MORE_CES;
-+ }
- }
-- else {
-- return UCOL_NO_MORE_CES;
-- }
-- }
-
-- if (collationSource->flags & UCOL_ITER_HASLEN) {
-- // Normal path for strings when length is specified.
-- // (We can't be in side buffer because it is always null terminated.)
-- if (collationSource->pos >= collationSource->endp) {
-- // Ran off of the end of the main source string. We're done.
-- return UCOL_NO_MORE_CES;
-+ if (collationSource->flags & UCOL_ITER_HASLEN) {
-+ // Normal path for strings when length is specified.
-+ // (We can't be in side buffer because it is always null terminated.)
-+ if (collationSource->pos >= collationSource->endp) {
-+ // Ran off of the end of the main source string. We're done.
-+ return UCOL_NO_MORE_CES;
-+ }
-+ ch = *collationSource->pos++;
- }
-- ch = *collationSource->pos++;
-- }
-- else if(collationSource->flags & UCOL_USE_ITERATOR) {
-- UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
-- if(iterCh == U_SENTINEL) {
-- return UCOL_NO_MORE_CES;
-- }
-- ch = (UChar)iterCh;
-- }
-- else
-- {
-- // Null terminated string.
-- ch = *collationSource->pos++;
-- if (ch == 0) {
-- // Ran off end of buffer.
-- if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
-- // Ran off end of main string. backing up one character.
-- collationSource->pos--;
-+ else if(collationSource->flags & UCOL_USE_ITERATOR) {
-+ UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
-+ if(iterCh == U_SENTINEL) {
- return UCOL_NO_MORE_CES;
- }
-- else
-- {
-- // Hit null in the normalize side buffer.
-- // Usually this means the end of the normalized data,
-- // except for one odd case: a null followed by combining chars,
-- // which is the case if we are at the start of the buffer.
-- if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {
-- break;
-+ ch = (UChar)iterCh;
-+ }
-+ else
-+ {
-+ // Null terminated string.
-+ ch = *collationSource->pos++;
-+ if (ch == 0) {
-+ // Ran off end of buffer.
-+ if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
-+ // Ran off end of main string. backing up one character.
-+ collationSource->pos--;
-+ return UCOL_NO_MORE_CES;
- }
-+ else
-+ {
-+ // Hit null in the normalize side buffer.
-+ // Usually this means the end of the normalized data,
-+ // except for one odd case: a null followed by combining chars,
-+ // which is the case if we are at the start of the buffer.
-+ if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {
-+ break;
-+ }
-
-- // Null marked end of side buffer.
-- // Revert to the main string and
-- // loop back to top to try again to get a character.
-- collationSource->pos = collationSource->fcdPosition;
-- collationSource->flags = collationSource->origFlags;
-- continue;
-+ // Null marked end of side buffer.
-+ // Revert to the main string and
-+ // loop back to top to try again to get a character.
-+ collationSource->pos = collationSource->fcdPosition;
-+ collationSource->flags = collationSource->origFlags;
-+ continue;
-+ }
- }
- }
-- }
-
-- if(collationSource->flags&UCOL_HIRAGANA_Q) {
-- /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
-- * based on whether the previous codepoint was Hiragana or Katakana.
-- */
-- if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||
-- ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
-- collationSource->flags |= UCOL_WAS_HIRAGANA;
-- } else {
-- collationSource->flags &= ~UCOL_WAS_HIRAGANA;
-+ if(collationSource->flags&UCOL_HIRAGANA_Q) {
-+ /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
-+ * based on whether the previous codepoint was Hiragana or Katakana.
-+ */
-+ if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||
-+ ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
-+ collationSource->flags |= UCOL_WAS_HIRAGANA;
-+ } else {
-+ collationSource->flags &= ~UCOL_WAS_HIRAGANA;
-+ }
- }
-- }
-
-- // We've got a character. See if there's any fcd and/or normalization stuff to do.
-- // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.
-- if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
-- break;
-- }
-+ // We've got a character. See if there's any fcd and/or normalization stuff to do.
-+ // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.
-+ if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
-+ break;
-+ }
-
-- if (collationSource->fcdPosition >= collationSource->pos) {
-- // An earlier FCD check has already covered the current character.
-- // We can go ahead and process this char.
-- break;
-- }
--
-- if (ch < ZERO_CC_LIMIT_ ) {
-- // Fast fcd safe path. Trailing combining class == 0. This char is OK.
-- break;
-- }
--
-- if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
-- // We need to peek at the next character in order to tell if we are FCD
-- if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
-- // We are at the last char of source string.
-- // It is always OK for FCD check.
-+ if (collationSource->fcdPosition >= collationSource->pos) {
-+ // An earlier FCD check has already covered the current character.
-+ // We can go ahead and process this char.
- break;
- }
-
-- // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test
-- if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
-+ if (ch < ZERO_CC_LIMIT_ ) {
-+ // Fast fcd safe path. Trailing combining class == 0. This char is OK.
- break;
- }
-- }
-
-+ if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
-+ // We need to peek at the next character in order to tell if we are FCD
-+ if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
-+ // We are at the last char of source string.
-+ // It is always OK for FCD check.
-+ break;
-+ }
-
-- // Need a more complete FCD check and possible normalization.
-- if (collIterFCD(collationSource)) {
-- collIterNormalize(collationSource);
-- }
-- if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
-- // No normalization was needed. Go ahead and process the char we already had.
-- break;
-- }
-+ // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test
-+ if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
-+ break;
-+ }
-+ }
-
-- // Some normalization happened. Next loop iteration will pick up a char
-- // from the normalization buffer.
-
-- } // end for (;;)
-+ // Need a more complete FCD check and possible normalization.
-+ if (collIterFCD(collationSource)) {
-+ collIterNormalize(collationSource);
-+ }
-+ if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
-+ // No normalization was needed. Go ahead and process the char we already had.
-+ break;
-+ }
-
-+ // Some normalization happened. Next loop iteration will pick up a char
-+ // from the normalization buffer.
-
-- if (ch <= 0xFF) {
-- /* For latin-1 characters we never need to fall back to the UCA table */
-- /* because all of the UCA data is replicated in the latinOneMapping array */
-- order = coll->latinOneMapping[ch];
-- if (order > UCOL_NOT_FOUND) {
-- order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
-+ } // end for (;;)
-+
-+
-+ if (ch <= 0xFF) {
-+ /* For latin-1 characters we never need to fall back to the UCA table */
-+ /* because all of the UCA data is replicated in the latinOneMapping array */
-+ order = coll->latinOneMapping[ch];
-+ if (order > UCOL_NOT_FOUND) {
-+ order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
-+ }
- }
-- }
-- else
-- {
-- // Always use UCA for Han, Hangul
-- // (Han extension A is before main Han block)
-- // **** Han compatibility chars ?? ****
-- if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
-- (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
-- if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
-- // between the two target ranges; do normal lookup
-- // **** this range is YI, Modifier tone letters, ****
-- // **** Latin-D, Syloti Nagari, Phagas-pa. ****
-- // **** Latin-D might be tailored, so we need to ****
-- // **** do the normal lookup for these guys. ****
-+ else
-+ {
-+ // Always use UCA for Han, Hangul
-+ // (Han extension A is before main Han block)
-+ // **** Han compatibility chars ?? ****
-+ if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
-+ (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
-+ if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
-+ // between the two target ranges; do normal lookup
-+ // **** this range is YI, Modifier tone letters, ****
-+ // **** Latin-D, Syloti Nagari, Phagas-pa. ****
-+ // **** Latin-D might be tailored, so we need to ****
-+ // **** do the normal lookup for these guys. ****
-+ order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
-+ } else {
-+ // in one of the target ranges; use UCA
-+ order = UCOL_NOT_FOUND;
-+ }
-+ } else {
- order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
-- } else {
-- // in one of the target ranges; use UCA
-- order = UCOL_NOT_FOUND;
- }
-- } else {
-- order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
-- }
-
-- if(order > UCOL_NOT_FOUND) { /* if a CE is special */
-- order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */
-- }
-+ if(order > UCOL_NOT_FOUND) { /* if a CE is special */
-+ order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */
-+ }
-
-- if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */
-- /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
-- order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
-+ if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */
-+ /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
-+ order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
-
-- if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
-- order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
-+ if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
-+ order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
-+ }
- }
- }
-- }
-+ } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
-+
- if(order == UCOL_NOT_FOUND) {
- order = getImplicit(ch, collationSource);
- }
-@@ -1958,161 +1961,163 @@
- else {
- UChar ch = 0;
-
-- /*
-- Loop handles case when incremental normalize switches to or from the
-- side buffer / original string, and we need to start again to get the
-- next character.
-- */
-- for (;;) {
-- if (data->flags & UCOL_ITER_HASLEN) {
-- /*
-- Normal path for strings when length is specified.
-- Not in side buffer because it is always null terminated.
-- */
-- if (data->pos <= data->string) {
-- /* End of the main source string */
-- return UCOL_NO_MORE_CES;
-- }
-- data->pos --;
-- ch = *data->pos;
-- }
-- // we are using an iterator to go back. Pray for us!
-- else if (data->flags & UCOL_USE_ITERATOR) {
-- UChar32 iterCh = data->iterator->previous(data->iterator);
-- if(iterCh == U_SENTINEL) {
-- return UCOL_NO_MORE_CES;
-- } else {
-- ch = (UChar)iterCh;
-- }
-- }
-- else {
-- data->pos --;
-- ch = *data->pos;
-- /* we are in the side buffer. */
-- if (ch == 0) {
-+ do {
-+ /*
-+ Loop handles case when incremental normalize switches to or from the
-+ side buffer / original string, and we need to start again to get the
-+ next character.
-+ */
-+ for (;;) {
-+ if (data->flags & UCOL_ITER_HASLEN) {
- /*
-- At the start of the normalize side buffer.
-- Go back to string.
-- Because pointer points to the last accessed character,
-- hence we have to increment it by one here.
-+ Normal path for strings when length is specified.
-+ Not in side buffer because it is always null terminated.
- */
-- data->flags = data->origFlags;
-- data->offsetRepeatValue = 0;
--
-- if (data->fcdPosition == NULL) {
-- data->pos = data->string;
-+ if (data->pos <= data->string) {
-+ /* End of the main source string */
- return UCOL_NO_MORE_CES;
- }
-- else {
-- data->pos = data->fcdPosition + 1;
-+ data->pos --;
-+ ch = *data->pos;
-+ }
-+ // we are using an iterator to go back. Pray for us!
-+ else if (data->flags & UCOL_USE_ITERATOR) {
-+ UChar32 iterCh = data->iterator->previous(data->iterator);
-+ if(iterCh == U_SENTINEL) {
-+ return UCOL_NO_MORE_CES;
-+ } else {
-+ ch = (UChar)iterCh;
-+ }
-+ }
-+ else {
-+ data->pos --;
-+ ch = *data->pos;
-+ /* we are in the side buffer. */
-+ if (ch == 0) {
-+ /*
-+ At the start of the normalize side buffer.
-+ Go back to string.
-+ Because pointer points to the last accessed character,
-+ hence we have to increment it by one here.
-+ */
-+ data->flags = data->origFlags;
-+ data->offsetRepeatValue = 0;
-+
-+ if (data->fcdPosition == NULL) {
-+ data->pos = data->string;
-+ return UCOL_NO_MORE_CES;
-+ }
-+ else {
-+ data->pos = data->fcdPosition + 1;
-+ }
-+
-+ continue;
- }
--
-- continue;
- }
-- }
-
-- if(data->flags&UCOL_HIRAGANA_Q) {
-- if(ch>=0x3040 && ch<=0x309f) {
-- data->flags |= UCOL_WAS_HIRAGANA;
-- } else {
-- data->flags &= ~UCOL_WAS_HIRAGANA;
-- }
-- }
-+ if(data->flags&UCOL_HIRAGANA_Q) {
-+ if(ch>=0x3040 && ch<=0x309f) {
-+ data->flags |= UCOL_WAS_HIRAGANA;
-+ } else {
-+ data->flags &= ~UCOL_WAS_HIRAGANA;
-+ }
-+ }
-
-- /*
-- * got a character to determine if there's fcd and/or normalization
-- * stuff to do.
-- * if the current character is not fcd.
-- * if current character is at the start of the string
-- * Trailing combining class == 0.
-- * Note if pos is in the writablebuffer, norm is always 0
-- */
-- if (ch < ZERO_CC_LIMIT_ ||
-- // this should propel us out of the loop in the iterator case
-- (data->flags & UCOL_ITER_NORM) == 0 ||
-- (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
-- || data->string == data->pos) {
-- break;
-- }
--
-- if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
-- /* if next character is FCD */
-- if (data->pos == data->string) {
-- /* First char of string is always OK for FCD check */
-+ /*
-+ * got a character to determine if there's fcd and/or normalization
-+ * stuff to do.
-+ * if the current character is not fcd.
-+ * if current character is at the start of the string
-+ * Trailing combining class == 0.
-+ * Note if pos is in the writablebuffer, norm is always 0
-+ */
-+ if (ch < ZERO_CC_LIMIT_ ||
-+ // this should propel us out of the loop in the iterator case
-+ (data->flags & UCOL_ITER_NORM) == 0 ||
-+ (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
-+ || data->string == data->pos) {
- break;
- }
-
-- /* Not first char of string, do the FCD fast test */
-- if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
-+ if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
-+ /* if next character is FCD */
-+ if (data->pos == data->string) {
-+ /* First char of string is always OK for FCD check */
-+ break;
-+ }
-+
-+ /* Not first char of string, do the FCD fast test */
-+ if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
-+ break;
-+ }
-+ }
-+
-+ /* Need a more complete FCD check and possible normalization. */
-+ if (collPrevIterFCD(data)) {
-+ collPrevIterNormalize(data);
-+ }
-+
-+ if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
-+ /* No normalization. Go ahead and process the char. */
- break;
- }
-- }
-
-- /* Need a more complete FCD check and possible normalization. */
-- if (collPrevIterFCD(data)) {
-- collPrevIterNormalize(data);
-+ /*
-+ Some normalization happened.
-+ Next loop picks up a char from the normalization buffer.
-+ */
- }
-
-- if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
-- /* No normalization. Go ahead and process the char. */
-- break;
-- }
--
-- /*
-- Some normalization happened.
-- Next loop picks up a char from the normalization buffer.
-+ /* attempt to handle contractions, after removal of the backwards
-+ contraction
- */
-- }
--
-- /* attempt to handle contractions, after removal of the backwards
-- contraction
-- */
-- if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
-- result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
-- } else {
-- if (ch <= 0xFF) {
-- result = coll->latinOneMapping[ch];
-- }
-- else {
-- // Always use UCA for [3400..9FFF], [AC00..D7AF]
-- // **** [FA0E..FA2F] ?? ****
-- if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
-- (ch >= 0x3400 && ch <= 0xD7AF)) {
-- if (ch > 0x9FFF && ch < 0xAC00) {
-- // between the two target ranges; do normal lookup
-- // **** this range is YI, Modifier tone letters, ****
-- // **** Latin-D, Syloti Nagari, Phagas-pa. ****
-- // **** Latin-D might be tailored, so we need to ****
-- // **** do the normal lookup for these guys. ****
-- result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
-+ if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
-+ result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
-+ } else {
-+ if (ch <= 0xFF) {
-+ result = coll->latinOneMapping[ch];
-+ }
-+ else {
-+ // Always use UCA for [3400..9FFF], [AC00..D7AF]
-+ // **** [FA0E..FA2F] ?? ****
-+ if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
-+ (ch >= 0x3400 && ch <= 0xD7AF)) {
-+ if (ch > 0x9FFF && ch < 0xAC00) {
-+ // between the two target ranges; do normal lookup
-+ // **** this range is YI, Modifier tone letters, ****
-+ // **** Latin-D, Syloti Nagari, Phagas-pa. ****
-+ // **** Latin-D might be tailored, so we need to ****
-+ // **** do the normal lookup for these guys. ****
-+ result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
-+ } else {
-+ result = UCOL_NOT_FOUND;
-+ }
- } else {
-- result = UCOL_NOT_FOUND;
-+ result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
- }
-- } else {
-- result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
- }
-- }
-- if (result > UCOL_NOT_FOUND) {
-- result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
-- }
-- if (result == UCOL_NOT_FOUND) { // Not found in master list
-- if (!isAtStartPrevIterate(data) &&
-- ucol_contractionEndCP(ch, data->coll))
-- {
-- result = UCOL_CONTRACTION;
-- } else {
-- if(coll->UCA) {
-- result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
-+ if (result > UCOL_NOT_FOUND) {
-+ result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
-+ }
-+ if (result == UCOL_NOT_FOUND) { // Not found in master list
-+ if (!isAtStartPrevIterate(data) &&
-+ ucol_contractionEndCP(ch, data->coll))
-+ {
-+ result = UCOL_CONTRACTION;
-+ } else {
-+ if(coll->UCA) {
-+ result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
-+ }
- }
-- }
-
-- if (result > UCOL_NOT_FOUND) {
-- if(coll->UCA) {
-- result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
-+ if (result > UCOL_NOT_FOUND) {
-+ if(coll->UCA) {
-+ result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
-+ }
- }
- }
- }
-- }
-+ } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
-
- if(result == UCOL_NOT_FOUND) {
- result = getPrevImplicit(ch, data);
-@@ -3193,6 +3198,7 @@
- // Since Hanguls pass the FCD check, it is
- // guaranteed that we won't be in
- // the normalization buffer if something like this happens
-+
- // However, if we are using a uchar iterator and normalization
- // is ON, the Hangul that lead us here is going to be in that
- // normalization buffer. Here we want to restore the uchar
-@@ -3201,6 +3207,7 @@
- source->flags = source->origFlags; // restore the iterator
- source->pos = NULL;
- }
-+
- // Move Jamos into normalization buffer
- UChar *buffer = source->writableBuffer.getBuffer(4);
- int32_t bufferLength;
-@@ -3214,8 +3221,9 @@
- }
- source->writableBuffer.releaseBuffer(bufferLength);
-
-- source->fcdPosition = source->pos; // Indicate where to continue in main input string
-- // after exhausting the writableBuffer
-+ // Indicate where to continue in main input string after exhausting the writableBuffer
-+ source->fcdPosition = source->pos;
-+
- source->pos = source->writableBuffer.getTerminatedBuffer();
- source->origFlags = source->flags;
- source->flags |= UCOL_ITER_INNORMBUF;
-@@ -3966,13 +3974,10 @@
- // Since Hanguls pass the FCD check, it is
- // guaranteed that we won't be in
- // the normalization buffer if something like this happens
-+
- // Move Jamos into normalization buffer
-- /*
-- Move the Jamos into the
-- normalization buffer
-- */
- UChar *tempbuffer = source->writableBuffer.getBuffer(5);
-- int32_t tempbufferLength;
-+ int32_t tempbufferLength, jamoOffset;
- tempbuffer[0] = 0;
- tempbuffer[1] = (UChar)L;
- tempbuffer[2] = (UChar)V;
-@@ -3984,16 +3989,30 @@
- }
- source->writableBuffer.releaseBuffer(tempbufferLength);
-
-- /*
-- Indicate where to continue in main input string after exhausting
-- the writableBuffer
-- */
-+ // Indicate where to continue in main input string after exhausting the writableBuffer
- if (source->pos == source->string) {
-+ jamoOffset = 0;
- source->fcdPosition = NULL;
- } else {
-+ jamoOffset = source->pos - source->string;
- source->fcdPosition = source->pos-1;
- }
-+
-+ // Append offsets for the additional chars
-+ // (not the 0, and not the L whose offsets match the original Hangul)
-+ int32_t jamoRemaining = tempbufferLength - 2;
-+ jamoOffset++; // appended offsets should match end of original Hangul
-+ while (jamoRemaining-- > 0) {
-+ source->appendOffset(jamoOffset, *status);
-+ }
-
-+ source->offsetRepeatValue = jamoOffset;
-+
-+ source->offsetReturn = source->offsetStore - 1;
-+ if (source->offsetReturn == source->offsetBuffer) {
-+ source->offsetStore = source->offsetBuffer;
-+ }
-+
- source->pos = source->writableBuffer.getTerminatedBuffer() + tempbufferLength;
- source->origFlags = source->flags;
- source->flags |= UCOL_ITER_INNORMBUF;
« no previous file with comments | « icu52/patches/rtti.patch ('k') | icu52/patches/segmentation.patch » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698