Index: source/test/cintltst/citertst.c |
diff --git a/source/test/cintltst/citertst.c b/source/test/cintltst/citertst.c |
index e8e2cff0ff36fdef49989e77bd55b52aacfc80fd..c49487ab8d18cc0c613ad33058cfa9e35bae447a 100644 |
--- a/source/test/cintltst/citertst.c |
+++ b/source/test/cintltst/citertst.c |
@@ -1,6 +1,6 @@ |
/******************************************************************** |
* COPYRIGHT: |
- * Copyright (c) 1997-2013, International Business Machines Corporation and |
+ * Copyright (c) 1997-2014, International Business Machines Corporation and |
* others. All Rights Reserved. |
********************************************************************/ |
/******************************************************************************** |
@@ -35,7 +35,6 @@ |
#include "filestrm.h" |
#include "cstring.h" |
#include "ucol_imp.h" |
-#include "ucol_tok.h" |
#include "uparse.h" |
#include <stdio.h> |
@@ -54,11 +53,7 @@ void addCollIterTest(TestNode** root) |
addTest(root, &TestBug672, "tscoll/citertst/TestBug672"); |
addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize"); |
addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer"); |
- addTest(root, &TestCEs, "tscoll/citertst/TestCEs"); |
addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos"); |
- addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow"); |
- addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity"); |
- addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity"); |
addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements"); |
} |
@@ -763,15 +758,10 @@ static void TestSetText() |
/* Now set it to point to a null string with fake length*/ |
ucol_setText(iter2, NULL, 2, &status); |
- if (U_FAILURE(status)) |
+ if (status != U_ILLEGAL_ARGUMENT_ERROR) |
{ |
- log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status)); |
- } |
- else |
- { |
- if (ucol_next(iter2, &status) != UCOL_NULLORDER) { |
- log_err("iter2 with null text expected to return UCOL_NULLORDER\n"); |
- } |
+ log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n", |
+ myErrorName(status)); |
} |
ucol_closeElements(iter2); |
@@ -1011,353 +1001,6 @@ static void TestSmallBuffer() |
} |
/** |
-* Sniplets of code from genuca |
-*/ |
-static int32_t hex2num(char hex) { |
- if(hex>='0' && hex <='9') { |
- return hex-'0'; |
- } else if(hex>='a' && hex<='f') { |
- return hex-'a'+10; |
- } else if(hex>='A' && hex<='F') { |
- return hex-'A'+10; |
- } else { |
- return 0; |
- } |
-} |
- |
-/** |
-* Getting codepoints from a string |
-* @param str character string contain codepoints seperated by space and ended |
-* by a semicolon |
-* @param codepoints array for storage, assuming size > 5 |
-* @return position at the end of the codepoint section |
-*/ |
-static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) { |
- UErrorCode errorCode = U_ZERO_ERROR; |
- char *semi = uprv_strchr(str, ';'); |
- char *pipe = uprv_strchr(str, '|'); |
- char *s; |
- *codepoints = 0; |
- *contextCPs = 0; |
- if(semi == NULL) { |
- log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str); |
- return str; |
- } |
- if(pipe != NULL) { |
- int32_t contextLength; |
- *pipe = 0; |
- contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode); |
- *pipe = '|'; |
- if(U_FAILURE(errorCode)) { |
- log_err("error parsing precontext string from FractionalUCA.txt %s\n", str); |
- return str; |
- } |
- /* prepend the precontext string to the codepoints */ |
- u_memcpy(codepoints, contextCPs, contextLength); |
- codepoints += contextLength; |
- /* start of the code point string */ |
- s = pipe + 1; |
- } else { |
- s = str; |
- } |
- u_parseString(s, codepoints, 99, NULL, &errorCode); |
- if(U_FAILURE(errorCode)) { |
- log_err("error parsing code point string from FractionalUCA.txt %s\n", str); |
- return str; |
- } |
- return semi + 1; |
-} |
- |
-/** |
-* Sniplets of code from genuca |
-*/ |
-static int32_t |
-readElement(char **from, char *to, char separator, UErrorCode *status) |
-{ |
- if (U_SUCCESS(*status)) { |
- char buffer[1024]; |
- int32_t i = 0; |
- while (**from != separator) { |
- if (**from != ' ') { |
- *(buffer+i++) = **from; |
- } |
- (*from)++; |
- } |
- (*from)++; |
- *(buffer + i) = 0; |
- strcpy(to, buffer); |
- return i/2; |
- } |
- |
- return 0; |
-} |
- |
-/** |
-* Sniplets of code from genuca |
-*/ |
-static uint32_t |
-getSingleCEValue(char *primary, char *secondary, char *tertiary, |
- UErrorCode *status) |
-{ |
- if (U_SUCCESS(*status)) { |
- uint32_t value = 0; |
- char primsave = '\0'; |
- char secsave = '\0'; |
- char tersave = '\0'; |
- char *primend = primary+4; |
- char *secend = secondary+2; |
- char *terend = tertiary+2; |
- uint32_t primvalue; |
- uint32_t secvalue; |
- uint32_t tervalue; |
- |
- if (uprv_strlen(primary) > 4) { |
- primsave = *primend; |
- *primend = '\0'; |
- } |
- |
- if (uprv_strlen(secondary) > 2) { |
- secsave = *secend; |
- *secend = '\0'; |
- } |
- |
- if (uprv_strlen(tertiary) > 2) { |
- tersave = *terend; |
- *terend = '\0'; |
- } |
- |
- primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0; |
- secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0; |
- tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0; |
- if(primvalue <= 0xFF) { |
- primvalue <<= 8; |
- } |
- |
- value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK) |
- | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK) |
- | (tervalue & UCOL_TERTIARYORDERMASK); |
- |
- if(primsave!='\0') { |
- *primend = primsave; |
- } |
- if(secsave!='\0') { |
- *secend = secsave; |
- } |
- if(tersave!='\0') { |
- *terend = tersave; |
- } |
- return value; |
- } |
- return 0; |
-} |
- |
-/** |
-* Getting collation elements generated from a string |
-* @param str character string contain collation elements contained in [] and |
-* seperated by space |
-* @param ce array for storage, assuming size > 20 |
-* @param status error status |
-* @return position at the end of the codepoint section |
-*/ |
-static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) { |
- char *pStartCP = uprv_strchr(str, '['); |
- int count = 0; |
- char *pEndCP; |
- char primary[100]; |
- char secondary[100]; |
- char tertiary[100]; |
- |
- while (*pStartCP == '[') { |
- uint32_t primarycount = 0; |
- uint32_t secondarycount = 0; |
- uint32_t tertiarycount = 0; |
- uint32_t CEi = 1; |
- pEndCP = strchr(pStartCP, ']'); |
- if(pEndCP == NULL) { |
- break; |
- } |
- pStartCP ++; |
- |
- primarycount = readElement(&pStartCP, primary, ',', status); |
- secondarycount = readElement(&pStartCP, secondary, ',', status); |
- tertiarycount = readElement(&pStartCP, tertiary, ']', status); |
- |
- /* I want to get the CEs entered right here, including continuation */ |
- ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status); |
- if (U_FAILURE(*status)) { |
- break; |
- } |
- |
- while (2 * CEi < primarycount || CEi < secondarycount || |
- CEi < tertiarycount) { |
- uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */ |
- if (2 * CEi < primarycount) { |
- value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28); |
- value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24); |
- } |
- |
- if (2 * CEi + 1 < primarycount) { |
- value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20); |
- value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16); |
- } |
- |
- if (CEi < secondarycount) { |
- value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12); |
- value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8); |
- } |
- |
- if (CEi < tertiarycount) { |
- value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4); |
- value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF); |
- } |
- |
- CEi ++; |
- ces[count ++] = value; |
- } |
- |
- pStartCP = pEndCP + 1; |
- } |
- ces[count] = 0; |
- return pStartCP; |
-} |
- |
-/** |
-* Getting the FractionalUCA.txt file stream |
-*/ |
-static FileStream * getFractionalUCA(void) |
-{ |
- char newPath[256]; |
- char backupPath[256]; |
- FileStream *result = NULL; |
- |
- /* Look inside ICU_DATA first */ |
- uprv_strcpy(newPath, ctest_dataSrcDir()); |
- uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING ); |
- uprv_strcat(newPath, "FractionalUCA.txt"); |
- |
- /* As a fallback, try to guess where the source data was located |
- * at the time ICU was built, and look there. |
- */ |
-#if defined (U_TOPSRCDIR) |
- strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); |
-#else |
- { |
- UErrorCode errorCode = U_ZERO_ERROR; |
- strcpy(backupPath, loadTestData(&errorCode)); |
- strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); |
- } |
-#endif |
- strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt"); |
- |
- result = T_FileStream_open(newPath, "rb"); |
- |
- if (result == NULL) { |
- result = T_FileStream_open(backupPath, "rb"); |
- if (result == NULL) { |
- log_err("Failed to open either %s or %s\n", newPath, backupPath); |
- } |
- } |
- return result; |
-} |
- |
-/** |
-* Testing the CEs returned by the iterator |
-*/ |
-static void TestCEs() { |
- FileStream *file = NULL; |
- char line[2048]; |
- char *str; |
- UChar codepoints[10]; |
- uint32_t ces[20]; |
- UErrorCode status = U_ZERO_ERROR; |
- UCollator *coll = ucol_open("", &status); |
- uint32_t lineNo = 0; |
- UChar contextCPs[5]; |
- |
- if (U_FAILURE(status)) { |
- log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status)); |
- return; |
- } |
- |
- file = getFractionalUCA(); |
- |
- if (file == NULL) { |
- log_err("*** unable to open input FractionalUCA.txt file ***\n"); |
- return; |
- } |
- |
- |
- while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { |
- int count = 0; |
- UCollationElements *iter; |
- int32_t preContextCeLen=0; |
- lineNo++; |
- /* skip this line if it is empty or a comment or is a return value |
- or start of some variable section */ |
- if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || |
- line[0] == 0x000D || line[0] == '[') { |
- continue; |
- } |
- |
- str = getCodePoints(line, codepoints, contextCPs); |
- |
- /* these are 'fake' codepoints in the fractional UCA, and are used just |
- * for positioning of indirect values. They should not go through this |
- * test. |
- */ |
- if(*codepoints == 0xFDD0) { |
- continue; |
- } |
- if (*contextCPs != 0) { |
- iter = ucol_openElements(coll, contextCPs, -1, &status); |
- if (U_FAILURE(status)) { |
- log_err("Error in opening collation elements\n"); |
- break; |
- } |
- while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) { |
- preContextCeLen++; |
- } |
- ucol_closeElements(iter); |
- } |
- |
- getCEs(str, ces+preContextCeLen, &status); |
- if (U_FAILURE(status)) { |
- log_err("Error in parsing collation elements in FractionalUCA.txt\n"); |
- break; |
- } |
- iter = ucol_openElements(coll, codepoints, -1, &status); |
- if (U_FAILURE(status)) { |
- log_err("Error in opening collation elements\n"); |
- break; |
- } |
- for (;;) { |
- uint32_t ce = (uint32_t)ucol_next(iter, &status); |
- if (ce == 0xFFFFFFFF) { |
- ce = 0; |
- } |
- /* we now unconditionally reorder Thai/Lao prevowels, so this |
- * test would fail if we don't skip here. |
- */ |
- if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) { |
- continue; |
- } |
- if (ce != ces[count] || U_FAILURE(status)) { |
- log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n"); |
- break; |
- } |
- if (ces[count] == 0) { |
- break; |
- } |
- count ++; |
- } |
- ucol_closeElements(iter); |
- } |
- |
- T_FileStream_close(file); |
- ucol_close(coll); |
-} |
- |
-/** |
* Testing the discontigous contractions |
*/ |
static void TestDiscontiguos() { |
@@ -1467,603 +1110,16 @@ static void TestDiscontiguos() { |
ucol_close(coll); |
} |
-static void TestCEBufferOverflow() |
-{ |
- UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1]; |
- UErrorCode status = U_ZERO_ERROR; |
- UChar rule[10]; |
- UCollator *coll; |
- UCollationElements *iter; |
- |
- u_uastrcpy(rule, "&z < AB"); |
- coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status); |
- if (U_FAILURE(status)) { |
- log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status)); |
- return; |
- } |
- |
- /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic |
- test. this will cause an overflow in getPrev */ |
- str[0] = 0x0041; /* 'A' */ |
- /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/ |
- uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE); |
- str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */ |
- iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1, |
- &status); |
- if (ucol_previous(iter, &status) == UCOL_NULLORDER || |
- status == U_BUFFER_OVERFLOW_ERROR) { |
- log_err("CE buffer should not overflow with long string of trail surrogates\n"); |
- } |
- ucol_closeElements(iter); |
- ucol_close(coll); |
-} |
- |
-/** |
-* Checking collation element validity. |
-*/ |
-#define MAX_CODEPOINTS_TO_SHOW 10 |
-static void showCodepoints(const UChar *codepoints, int length, char * codepointText) { |
- int i, lengthToUse = length; |
- if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) { |
- lengthToUse = MAX_CODEPOINTS_TO_SHOW; |
- } |
- for (i = 0; i < lengthToUse; ++i) { |
- int bytesWritten = sprintf(codepointText, " %04X", *codepoints++); |
- if (bytesWritten <= 0) { |
- break; |
- } |
- codepointText += bytesWritten; |
- } |
- if (i < length) { |
- sprintf(codepointText, " ..."); |
- } |
-} |
- |
-static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints, |
- int length) |
-{ |
- UErrorCode status = U_ZERO_ERROR; |
- UCollationElements *iter = ucol_openElements(coll, codepoints, length, |
- &status); |
- UBool result = FALSE; |
- UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE; |
- const char * collLocale; |
- |
- if (U_FAILURE(status)) { |
- log_err("Error creating iterator for testing validity\n"); |
- return FALSE; |
- } |
- collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status); |
- if (U_FAILURE(status) || collLocale==NULL) { |
- status = U_ZERO_ERROR; |
- collLocale = "?"; |
- } |
- |
- for (;;) { |
- uint32_t ce = ucol_next(iter, &status); |
- uint32_t primary, p1, p2, secondary, tertiary; |
- if (ce == UCOL_NULLORDER) { |
- result = TRUE; |
- break; |
- } |
- if (ce == 0) { |
- continue; |
- } |
- if (ce == 0x02000202) { |
- /* special CE for merge-sort character */ |
- if (*codepoints == 0xFFFE /* && length == 1 */) { |
- /* |
- * Note: We should check for length==1 but the token parser appears |
- * to give us trailing NUL characters. |
- * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet() |
- * rather than the internal collation rule parser |
- */ |
- continue; |
- } else { |
- log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n", |
- (int)*codepoints, (int)length); |
- break; |
- } |
- } |
- primary = UCOL_PRIMARYORDER(ce); |
- p1 = primary >> 8; |
- p2 = primary & 0xFF; |
- secondary = UCOL_SECONDARYORDER(ce); |
- tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION; |
- |
- if (!isContinuation(ce)) { |
- if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { |
- log_err("Empty CE %08lX except for case bits\n", (long)ce); |
- break; |
- } |
- if (p1 == 0) { |
- if (p2 != 0) { |
- log_err("Primary 00 xx in %08lX\n", (long)ce); |
- break; |
- } |
- primaryDone = TRUE; |
- } else { |
- if (p1 <= 2 || p1 >= 0xF0) { |
- /* Primary first bytes F0..FF are specials. */ |
- log_err("Primary first byte of %08lX out of range\n", (long)ce); |
- break; |
- } |
- if (p2 == 0) { |
- primaryDone = TRUE; |
- } else { |
- if (p2 <= 3 || p2 >= 0xFF) { |
- /* Primary second bytes 03 and FF are sort key compression terminators. */ |
- log_err("Primary second byte of %08lX out of range\n", (long)ce); |
- break; |
- } |
- primaryDone = FALSE; |
- } |
- } |
- if (secondary == 0) { |
- if (primary != 0) { |
- log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce); |
- break; |
- } |
- secondaryDone = TRUE; |
- } else { |
- if (secondary <= 2 || |
- (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80)) |
- ) { |
- /* Secondary first bytes common+1..+0x80 are used for sort key compression. */ |
- log_err("Secondary byte of %08lX out of range\n", (long)ce); |
- break; |
- } |
- secondaryDone = FALSE; |
- } |
- if (tertiary == 0) { |
- /* We know that ce != 0. */ |
- log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce); |
- break; |
- } |
- if (tertiary <= 2) { |
- log_err("Tertiary byte of %08lX out of range\n", (long)ce); |
- break; |
- } |
- tertiaryDone = FALSE; |
- } else { |
- if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { |
- log_err("Empty continuation %08lX\n", (long)ce); |
- break; |
- } |
- if (primaryDone && primary != 0) { |
- log_err("Primary was done but continues in %08lX\n", (long)ce); |
- break; |
- } |
- if (p1 == 0) { |
- if (p2 != 0) { |
- log_err("Primary 00 xx in %08lX\n", (long)ce); |
- break; |
- } |
- primaryDone = TRUE; |
- } else { |
- if (p1 <= 2) { |
- log_err("Primary first byte of %08lX out of range\n", (long)ce); |
- break; |
- } |
- if (p2 == 0) { |
- primaryDone = TRUE; |
- } else { |
- if (p2 <= 3) { |
- log_err("Primary second byte of %08lX out of range\n", (long)ce); |
- break; |
- } |
- } |
- } |
- if (secondaryDone && secondary != 0) { |
- log_err("Secondary was done but continues in %08lX\n", (long)ce); |
- break; |
- } |
- if (secondary == 0) { |
- secondaryDone = TRUE; |
- } else { |
- if (secondary <= 2) { |
- log_err("Secondary byte of %08lX out of range\n", (long)ce); |
- break; |
- } |
- } |
- if (tertiaryDone && tertiary != 0) { |
- log_err("Tertiary was done but continues in %08lX\n", (long)ce); |
- break; |
- } |
- if (tertiary == 0) { |
- tertiaryDone = TRUE; |
- } else if (tertiary <= 2) { |
- log_err("Tertiary byte of %08lX out of range\n", (long)ce); |
- break; |
- } |
- } |
- } |
- if (!result) { |
- char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5]; |
- showCodepoints(codepoints, length, codepointText); |
- log_err("Locale: %s Code point string: %s\n", collLocale, codepointText); |
- } |
- ucol_closeElements(iter); |
- return result; |
-} |
- |
-static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 }; /* "[import" */ |
- |
-static void TestCEValidity() |
-{ |
- /* testing UCA collation elements */ |
- UErrorCode status = U_ZERO_ERROR; |
- /* en_US has no tailorings */ |
- UCollator *coll = ucol_open("root", &status); |
- /* tailored locales */ |
- char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"}; |
- const char *loc; |
- FileStream *file = NULL; |
- char line[2048]; |
- UChar codepoints[11]; |
- int count = 0; |
- int maxCount = 0; |
- UChar contextCPs[3]; |
- UChar32 c; |
- UParseError parseError; |
- if (U_FAILURE(status)) { |
- log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status)); |
- return; |
- } |
- log_verbose("Testing UCA elements\n"); |
- file = getFractionalUCA(); |
- if (file == NULL) { |
- log_err("Fractional UCA data can not be opened\n"); |
- return; |
- } |
- |
- while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { |
- if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || |
- line[0] == 0x000D || line[0] == '[') { |
- continue; |
- } |
- |
- getCodePoints(line, codepoints, contextCPs); |
- checkCEValidity(coll, codepoints, u_strlen(codepoints)); |
- } |
- |
- log_verbose("Testing UCA elements for the whole range of unicode characters\n"); |
- for (c = 0; c <= 0xffff; ++c) { |
- if (u_isdefined(c)) { |
- codepoints[0] = (UChar)c; |
- checkCEValidity(coll, codepoints, 1); |
- } |
- } |
- for (; c <= 0x10ffff; ++c) { |
- if (u_isdefined(c)) { |
- int32_t i = 0; |
- U16_APPEND_UNSAFE(codepoints, i, c); |
- checkCEValidity(coll, codepoints, i); |
- } |
- } |
- |
- ucol_close(coll); |
- |
- /* testing tailored collation elements */ |
- log_verbose("Testing tailored elements\n"); |
- if(getTestOption(QUICK_OPTION)) { |
- maxCount = sizeof(locale)/sizeof(locale[0]); |
- } else { |
- maxCount = uloc_countAvailable(); |
- } |
- while (count < maxCount) { |
- const UChar *rules = NULL, |
- *current = NULL; |
- UChar *rulesCopy = NULL; |
- int32_t ruleLen = 0; |
- |
- uint32_t chOffset = 0; |
- uint32_t chLen = 0; |
- uint32_t exOffset = 0; |
- uint32_t exLen = 0; |
- uint32_t prefixOffset = 0; |
- uint32_t prefixLen = 0; |
- UBool startOfRules = TRUE; |
- UColOptionSet opts; |
- |
- UColTokenParser src; |
- uint32_t strength = 0; |
- uint16_t specs = 0; |
- |
- (void)specs; /* Suppress set but not used warnings. */ |
- (void)strength; |
- (void)prefixLen; |
- (void)prefixOffset; |
- (void)exLen; |
- (void)exOffset; |
- |
- if(getTestOption(QUICK_OPTION)) { |
- loc = locale[count]; |
- } else { |
- loc = uloc_getAvailable(count); |
- if(!hasCollationElements(loc)) { |
- count++; |
- continue; |
- } |
- } |
- status = U_ZERO_ERROR; // clear status from previous loop iteration |
- |
- uprv_memset(&src, 0, sizeof(UColTokenParser)); |
- |
- log_verbose("Testing CEs for %s\n", loc); |
- |
- coll = ucol_open(loc, &status); |
- if (U_FAILURE(status)) { |
- log_err("%s collator creation failed with status %s\n", loc, u_errorName(status)); |
- return; |
- } |
- |
- src.opts = &opts; |
- rules = ucol_getRules(coll, &ruleLen); |
- |
- /* |
- * We have not set up the UColTokenParser with a callback function |
- * to fetch [import] sub-rules, |
- * so skip testing tailorings that import others. |
- * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet() |
- * rather than the internal collation rule parser |
- */ |
- if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) { |
- rulesCopy = (UChar *)uprv_malloc((ruleLen + |
- UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); |
- uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); |
- src.current = src.source = rulesCopy; |
- src.end = rulesCopy + ruleLen; |
- src.extraCurrent = src.end; |
- src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; |
- |
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to |
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ |
- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL && U_SUCCESS(status)) { |
- strength = src.parsedToken.strength; |
- chOffset = src.parsedToken.charsOffset; |
- chLen = src.parsedToken.charsLen; |
- exOffset = src.parsedToken.extensionOffset; |
- exLen = src.parsedToken.extensionLen; |
- prefixOffset = src.parsedToken.prefixOffset; |
- prefixLen = src.parsedToken.prefixLen; |
- specs = src.parsedToken.flags; |
- |
- startOfRules = FALSE; |
- uprv_memcpy(codepoints, src.source + chOffset, |
- chLen * sizeof(UChar)); |
- codepoints[chLen] = 0; |
- checkCEValidity(coll, codepoints, chLen); |
- } |
- if (U_FAILURE(status)) { |
- log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", loc, u_errorName(status)); |
- } |
- uprv_free(src.source); |
- uprv_free(src.reorderCodes); |
- } |
- |
- ucol_close(coll); |
- count ++; |
- } |
- T_FileStream_close(file); |
-} |
- |
-static void printSortKeyError(const UChar *codepoints, int length, |
- uint8_t *sortkey, int sklen) |
-{ |
- int count = 0; |
- log_err("Sortkey not valid for "); |
- while (length > 0) { |
- log_err("0x%04x ", *codepoints); |
- length --; |
- codepoints ++; |
- } |
- log_err("\nSortkey : "); |
- while (count < sklen) { |
- log_err("0x%02x ", sortkey[count]); |
- count ++; |
- } |
- log_err("\n"); |
-} |
- |
-/** |
-* Checking sort key validity for all levels |
-*/ |
-static UBool checkSortKeyValidity(UCollator *coll, |
- const UChar *codepoints, |
- int length) |
-{ |
- UErrorCode status = U_ZERO_ERROR; |
- UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY, |
- UCOL_TERTIARY, UCOL_QUATERNARY, |
- UCOL_IDENTICAL}; |
- int strengthlen = 5; |
- int strengthIndex = 0; |
- int caselevel = 0; |
- |
- while (caselevel < 1) { |
- if (caselevel == 0) { |
- ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); |
- } |
- else { |
- ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status); |
- } |
- |
- while (strengthIndex < strengthlen) { |
- int count01 = 0; |
- uint32_t count = 0; |
- uint8_t sortkey[128]; |
- uint32_t sklen; |
- |
- ucol_setStrength(coll, strength[strengthIndex]); |
- sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128); |
- while (sortkey[count] != 0) { |
- if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) { |
- printSortKeyError(codepoints, length, sortkey, sklen); |
- return FALSE; |
- } |
- if (sortkey[count] == 1) { |
- count01 ++; |
- } |
- count ++; |
- } |
- |
- if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) { |
- printSortKeyError(codepoints, length, sortkey, sklen); |
- return FALSE; |
- } |
- strengthIndex ++; |
- } |
- caselevel ++; |
- } |
- return TRUE; |
-} |
- |
-static void TestSortKeyValidity(void) |
-{ |
- /* testing UCA collation elements */ |
- UErrorCode status = U_ZERO_ERROR; |
- /* en_US has no tailorings */ |
- UCollator *coll = ucol_open("en_US", &status); |
- /* tailored locales */ |
- char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"}; |
- FileStream *file = NULL; |
- char line[2048]; |
- UChar codepoints[10]; |
- int count = 0; |
- UChar contextCPs[5]; |
- UParseError parseError; |
- if (U_FAILURE(status)) { |
- log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status)); |
- return; |
- } |
- log_verbose("Testing UCA elements\n"); |
- file = getFractionalUCA(); |
- if (file == NULL) { |
- log_err("Fractional UCA data can not be opened\n"); |
- return; |
- } |
- |
- while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { |
- if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || |
- line[0] == 0x000D || line[0] == '[') { |
- continue; |
- } |
- |
- getCodePoints(line, codepoints, contextCPs); |
- if(codepoints[0] == 0xFFFE) { |
- /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */ |
- continue; |
- } |
- checkSortKeyValidity(coll, codepoints, u_strlen(codepoints)); |
- } |
- |
- log_verbose("Testing UCA elements for the whole range of unicode characters\n"); |
- codepoints[0] = 0; |
- |
- while (codepoints[0] < 0xFFFF) { |
- if (u_isdefined((UChar32)codepoints[0])) { |
- checkSortKeyValidity(coll, codepoints, 1); |
- } |
- codepoints[0] ++; |
- } |
- |
- ucol_close(coll); |
- |
- /* testing tailored collation elements */ |
- log_verbose("Testing tailored elements\n"); |
- while (count < 5) { |
- const UChar *rules = NULL, |
- *current = NULL; |
- UChar *rulesCopy = NULL; |
- int32_t ruleLen = 0; |
- |
- uint32_t chOffset = 0; |
- uint32_t chLen = 0; |
- uint32_t exOffset = 0; |
- uint32_t exLen = 0; |
- uint32_t prefixOffset = 0; |
- uint32_t prefixLen = 0; |
- UBool startOfRules = TRUE; |
- UColOptionSet opts; |
- |
- UColTokenParser src; |
- uint32_t strength = 0; |
- uint16_t specs = 0; |
- status = U_ZERO_ERROR; // clear status from previous loop iteration |
- |
- (void)specs; |
- (void)strength; |
- (void)prefixLen; |
- (void)prefixOffset; |
- (void)exLen; |
- (void)exOffset; |
- |
- uprv_memset(&src, 0, sizeof(UColTokenParser)); |
- |
- coll = ucol_open(locale[count], &status); |
- if (U_FAILURE(status)) { |
- log_err("%s collator creation failed with status %s\n", locale[count], u_errorName(status)); |
- return; |
- } |
- |
- src.opts = &opts; |
- rules = ucol_getRules(coll, &ruleLen); |
- |
- /* |
- * We have not set up the UColTokenParser with a callback function |
- * to fetch [import] sub-rules, |
- * so skip testing tailorings that import others. |
- * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailoredSet() |
- * rather than the internal collation rule parser |
- */ |
- if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) { |
- rulesCopy = (UChar *)uprv_malloc((ruleLen + |
- UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); |
- uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); |
- src.current = src.source = rulesCopy; |
- src.end = rulesCopy + ruleLen; |
- src.extraCurrent = src.end; |
- src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; |
- |
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to |
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ |
- while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL && U_SUCCESS(status)) { |
- strength = src.parsedToken.strength; |
- chOffset = src.parsedToken.charsOffset; |
- chLen = src.parsedToken.charsLen; |
- exOffset = src.parsedToken.extensionOffset; |
- exLen = src.parsedToken.extensionLen; |
- prefixOffset = src.parsedToken.prefixOffset; |
- prefixLen = src.parsedToken.prefixLen; |
- specs = src.parsedToken.flags; |
- |
- startOfRules = FALSE; |
- uprv_memcpy(codepoints, src.source + chOffset, |
- chLen * sizeof(UChar)); |
- codepoints[chLen] = 0; |
- if(codepoints[0] == 0xFFFE) { |
- /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */ |
- continue; |
- } |
- checkSortKeyValidity(coll, codepoints, chLen); |
- } |
- if (U_FAILURE(status)) { |
- log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", locale[count], u_errorName(status)); |
- } |
- uprv_free(src.source); |
- uprv_free(src.reorderCodes); |
- } |
- |
- ucol_close(coll); |
- count ++; |
- } |
- T_FileStream_close(file); |
-} |
- |
/** |
* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with |
* normalization on AND jamo tailoring, among other things. |
+* |
+* Note: This test is sensitive to changes of the root collator, |
+* for example whether the ae-ligature maps to three CEs (as in the DUCET) |
+* or to two CEs (as in the CLDR 24 FractionalUCA.txt). |
+* It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding. |
+* For example, the DUCET's artificial secondary CE in the ae-ligature |
+* may map to two 32-bit iterator CEs (as it did until ICU 52). |
*/ |
static const UChar tsceText[] = { /* Nothing in here should be ignorable */ |
0x0020, 0xAC00, /* simple LV Hangul */ |
@@ -2089,7 +1145,7 @@ static const int32_t rootStandardOffsets[] = { |
12, 13,14,15, |
16, 17,18,19, |
20, 21,22,23, |
- 24, 25,26,26,26, |
+ 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */ |
26, 27,28,28, |
28, |
29 |
@@ -2105,7 +1161,7 @@ static const int32_t rootSearchOffsets[] = { |
12, 13,14,15, |
16, 17,18,19,20, |
20, 21,22,22,23,23,23,24, |
- 24, 25,26,26,26, |
+ 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */ |
26, 27,28,28, |
28, |
29 |
@@ -2142,6 +1198,7 @@ static void TestSearchCollatorElements(void) |
do { |
offset = ucol_getOffset(uce); |
element = ucol_next(uce, &status); |
+ log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element); |
if ( element == 0 ) { |
log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale ); |
} |