| Index: source/test/cintltst/citertst.c
|
| diff --git a/source/test/cintltst/citertst.c b/source/test/cintltst/citertst.c
|
| index e8e2cff0ff36fdef49989e77bd55b52aacfc80fd..c49487ab8d18cc0c613ad33058cfa9e35bae447a 100644
|
| --- a/source/test/cintltst/citertst.c
|
| +++ b/source/test/cintltst/citertst.c
|
| @@ -1,6 +1,6 @@
|
| /********************************************************************
|
| * COPYRIGHT:
|
| - * Copyright (c) 1997-2013, International Business Machines Corporation and
|
| + * Copyright (c) 1997-2014, International Business Machines Corporation and
|
| * others. All Rights Reserved.
|
| ********************************************************************/
|
| /********************************************************************************
|
| @@ -35,7 +35,6 @@
|
| #include "filestrm.h"
|
| #include "cstring.h"
|
| #include "ucol_imp.h"
|
| -#include "ucol_tok.h"
|
| #include "uparse.h"
|
| #include <stdio.h>
|
|
|
| @@ -54,11 +53,7 @@ void addCollIterTest(TestNode** root)
|
| addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
|
| addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
|
| addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
|
| - addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
|
| addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
|
| - addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
|
| - addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
|
| - addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
|
| addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
|
| }
|
|
|
| @@ -763,15 +758,10 @@ static void TestSetText()
|
|
|
| /* Now set it to point to a null string with fake length*/
|
| ucol_setText(iter2, NULL, 2, &status);
|
| - if (U_FAILURE(status))
|
| + if (status != U_ILLEGAL_ARGUMENT_ERROR)
|
| {
|
| - log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
|
| - }
|
| - else
|
| - {
|
| - if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
|
| - log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
|
| - }
|
| + log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
|
| + myErrorName(status));
|
| }
|
|
|
| ucol_closeElements(iter2);
|
| @@ -1011,353 +1001,6 @@ static void TestSmallBuffer()
|
| }
|
|
|
| /**
|
| -* Sniplets of code from genuca
|
| -*/
|
| -static int32_t hex2num(char hex) {
|
| - if(hex>='0' && hex <='9') {
|
| - return hex-'0';
|
| - } else if(hex>='a' && hex<='f') {
|
| - return hex-'a'+10;
|
| - } else if(hex>='A' && hex<='F') {
|
| - return hex-'A'+10;
|
| - } else {
|
| - return 0;
|
| - }
|
| -}
|
| -
|
| -/**
|
| -* Getting codepoints from a string
|
| -* @param str character string contain codepoints seperated by space and ended
|
| -* by a semicolon
|
| -* @param codepoints array for storage, assuming size > 5
|
| -* @return position at the end of the codepoint section
|
| -*/
|
| -static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
|
| - UErrorCode errorCode = U_ZERO_ERROR;
|
| - char *semi = uprv_strchr(str, ';');
|
| - char *pipe = uprv_strchr(str, '|');
|
| - char *s;
|
| - *codepoints = 0;
|
| - *contextCPs = 0;
|
| - if(semi == NULL) {
|
| - log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
|
| - return str;
|
| - }
|
| - if(pipe != NULL) {
|
| - int32_t contextLength;
|
| - *pipe = 0;
|
| - contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
|
| - *pipe = '|';
|
| - if(U_FAILURE(errorCode)) {
|
| - log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
|
| - return str;
|
| - }
|
| - /* prepend the precontext string to the codepoints */
|
| - u_memcpy(codepoints, contextCPs, contextLength);
|
| - codepoints += contextLength;
|
| - /* start of the code point string */
|
| - s = pipe + 1;
|
| - } else {
|
| - s = str;
|
| - }
|
| - u_parseString(s, codepoints, 99, NULL, &errorCode);
|
| - if(U_FAILURE(errorCode)) {
|
| - log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
|
| - return str;
|
| - }
|
| - return semi + 1;
|
| -}
|
| -
|
| -/**
|
| -* Sniplets of code from genuca
|
| -*/
|
| -static int32_t
|
| -readElement(char **from, char *to, char separator, UErrorCode *status)
|
| -{
|
| - if (U_SUCCESS(*status)) {
|
| - char buffer[1024];
|
| - int32_t i = 0;
|
| - while (**from != separator) {
|
| - if (**from != ' ') {
|
| - *(buffer+i++) = **from;
|
| - }
|
| - (*from)++;
|
| - }
|
| - (*from)++;
|
| - *(buffer + i) = 0;
|
| - strcpy(to, buffer);
|
| - return i/2;
|
| - }
|
| -
|
| - return 0;
|
| -}
|
| -
|
| -/**
|
| -* Sniplets of code from genuca
|
| -*/
|
| -static uint32_t
|
| -getSingleCEValue(char *primary, char *secondary, char *tertiary,
|
| - UErrorCode *status)
|
| -{
|
| - if (U_SUCCESS(*status)) {
|
| - uint32_t value = 0;
|
| - char primsave = '\0';
|
| - char secsave = '\0';
|
| - char tersave = '\0';
|
| - char *primend = primary+4;
|
| - char *secend = secondary+2;
|
| - char *terend = tertiary+2;
|
| - uint32_t primvalue;
|
| - uint32_t secvalue;
|
| - uint32_t tervalue;
|
| -
|
| - if (uprv_strlen(primary) > 4) {
|
| - primsave = *primend;
|
| - *primend = '\0';
|
| - }
|
| -
|
| - if (uprv_strlen(secondary) > 2) {
|
| - secsave = *secend;
|
| - *secend = '\0';
|
| - }
|
| -
|
| - if (uprv_strlen(tertiary) > 2) {
|
| - tersave = *terend;
|
| - *terend = '\0';
|
| - }
|
| -
|
| - primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
|
| - secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
|
| - tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
|
| - if(primvalue <= 0xFF) {
|
| - primvalue <<= 8;
|
| - }
|
| -
|
| - value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
|
| - | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
|
| - | (tervalue & UCOL_TERTIARYORDERMASK);
|
| -
|
| - if(primsave!='\0') {
|
| - *primend = primsave;
|
| - }
|
| - if(secsave!='\0') {
|
| - *secend = secsave;
|
| - }
|
| - if(tersave!='\0') {
|
| - *terend = tersave;
|
| - }
|
| - return value;
|
| - }
|
| - return 0;
|
| -}
|
| -
|
| -/**
|
| -* Getting collation elements generated from a string
|
| -* @param str character string contain collation elements contained in [] and
|
| -* seperated by space
|
| -* @param ce array for storage, assuming size > 20
|
| -* @param status error status
|
| -* @return position at the end of the codepoint section
|
| -*/
|
| -static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
|
| - char *pStartCP = uprv_strchr(str, '[');
|
| - int count = 0;
|
| - char *pEndCP;
|
| - char primary[100];
|
| - char secondary[100];
|
| - char tertiary[100];
|
| -
|
| - while (*pStartCP == '[') {
|
| - uint32_t primarycount = 0;
|
| - uint32_t secondarycount = 0;
|
| - uint32_t tertiarycount = 0;
|
| - uint32_t CEi = 1;
|
| - pEndCP = strchr(pStartCP, ']');
|
| - if(pEndCP == NULL) {
|
| - break;
|
| - }
|
| - pStartCP ++;
|
| -
|
| - primarycount = readElement(&pStartCP, primary, ',', status);
|
| - secondarycount = readElement(&pStartCP, secondary, ',', status);
|
| - tertiarycount = readElement(&pStartCP, tertiary, ']', status);
|
| -
|
| - /* I want to get the CEs entered right here, including continuation */
|
| - ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
|
| - if (U_FAILURE(*status)) {
|
| - break;
|
| - }
|
| -
|
| - while (2 * CEi < primarycount || CEi < secondarycount ||
|
| - CEi < tertiarycount) {
|
| - uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
|
| - if (2 * CEi < primarycount) {
|
| - value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
|
| - value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
|
| - }
|
| -
|
| - if (2 * CEi + 1 < primarycount) {
|
| - value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
|
| - value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
|
| - }
|
| -
|
| - if (CEi < secondarycount) {
|
| - value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
|
| - value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
|
| - }
|
| -
|
| - if (CEi < tertiarycount) {
|
| - value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
|
| - value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
|
| - }
|
| -
|
| - CEi ++;
|
| - ces[count ++] = value;
|
| - }
|
| -
|
| - pStartCP = pEndCP + 1;
|
| - }
|
| - ces[count] = 0;
|
| - return pStartCP;
|
| -}
|
| -
|
| -/**
|
| -* Getting the FractionalUCA.txt file stream
|
| -*/
|
| -static FileStream * getFractionalUCA(void)
|
| -{
|
| - char newPath[256];
|
| - char backupPath[256];
|
| - FileStream *result = NULL;
|
| -
|
| - /* Look inside ICU_DATA first */
|
| - uprv_strcpy(newPath, ctest_dataSrcDir());
|
| - uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
|
| - uprv_strcat(newPath, "FractionalUCA.txt");
|
| -
|
| - /* As a fallback, try to guess where the source data was located
|
| - * at the time ICU was built, and look there.
|
| - */
|
| -#if defined (U_TOPSRCDIR)
|
| - strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
|
| -#else
|
| - {
|
| - UErrorCode errorCode = U_ZERO_ERROR;
|
| - strcpy(backupPath, loadTestData(&errorCode));
|
| - strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
|
| - }
|
| -#endif
|
| - strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
|
| -
|
| - result = T_FileStream_open(newPath, "rb");
|
| -
|
| - if (result == NULL) {
|
| - result = T_FileStream_open(backupPath, "rb");
|
| - if (result == NULL) {
|
| - log_err("Failed to open either %s or %s\n", newPath, backupPath);
|
| - }
|
| - }
|
| - return result;
|
| -}
|
| -
|
| -/**
|
| -* Testing the CEs returned by the iterator
|
| -*/
|
| -static void TestCEs() {
|
| - FileStream *file = NULL;
|
| - char line[2048];
|
| - char *str;
|
| - UChar codepoints[10];
|
| - uint32_t ces[20];
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UCollator *coll = ucol_open("", &status);
|
| - uint32_t lineNo = 0;
|
| - UChar contextCPs[5];
|
| -
|
| - if (U_FAILURE(status)) {
|
| - log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
|
| - return;
|
| - }
|
| -
|
| - file = getFractionalUCA();
|
| -
|
| - if (file == NULL) {
|
| - log_err("*** unable to open input FractionalUCA.txt file ***\n");
|
| - return;
|
| - }
|
| -
|
| -
|
| - while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
|
| - int count = 0;
|
| - UCollationElements *iter;
|
| - int32_t preContextCeLen=0;
|
| - lineNo++;
|
| - /* skip this line if it is empty or a comment or is a return value
|
| - or start of some variable section */
|
| - if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
|
| - line[0] == 0x000D || line[0] == '[') {
|
| - continue;
|
| - }
|
| -
|
| - str = getCodePoints(line, codepoints, contextCPs);
|
| -
|
| - /* these are 'fake' codepoints in the fractional UCA, and are used just
|
| - * for positioning of indirect values. They should not go through this
|
| - * test.
|
| - */
|
| - if(*codepoints == 0xFDD0) {
|
| - continue;
|
| - }
|
| - if (*contextCPs != 0) {
|
| - iter = ucol_openElements(coll, contextCPs, -1, &status);
|
| - if (U_FAILURE(status)) {
|
| - log_err("Error in opening collation elements\n");
|
| - break;
|
| - }
|
| - while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
|
| - preContextCeLen++;
|
| - }
|
| - ucol_closeElements(iter);
|
| - }
|
| -
|
| - getCEs(str, ces+preContextCeLen, &status);
|
| - if (U_FAILURE(status)) {
|
| - log_err("Error in parsing collation elements in FractionalUCA.txt\n");
|
| - break;
|
| - }
|
| - iter = ucol_openElements(coll, codepoints, -1, &status);
|
| - if (U_FAILURE(status)) {
|
| - log_err("Error in opening collation elements\n");
|
| - break;
|
| - }
|
| - for (;;) {
|
| - uint32_t ce = (uint32_t)ucol_next(iter, &status);
|
| - if (ce == 0xFFFFFFFF) {
|
| - ce = 0;
|
| - }
|
| - /* we now unconditionally reorder Thai/Lao prevowels, so this
|
| - * test would fail if we don't skip here.
|
| - */
|
| - if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
|
| - continue;
|
| - }
|
| - if (ce != ces[count] || U_FAILURE(status)) {
|
| - log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
|
| - break;
|
| - }
|
| - if (ces[count] == 0) {
|
| - break;
|
| - }
|
| - count ++;
|
| - }
|
| - ucol_closeElements(iter);
|
| - }
|
| -
|
| - T_FileStream_close(file);
|
| - ucol_close(coll);
|
| -}
|
| -
|
| -/**
|
| * Testing the discontigous contractions
|
| */
|
| static void TestDiscontiguos() {
|
| @@ -1467,603 +1110,16 @@ static void TestDiscontiguos() {
|
| ucol_close(coll);
|
| }
|
|
|
| -static void TestCEBufferOverflow()
|
| -{
|
| - UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UChar rule[10];
|
| - UCollator *coll;
|
| - UCollationElements *iter;
|
| -
|
| - u_uastrcpy(rule, "&z < AB");
|
| - coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
|
| - if (U_FAILURE(status)) {
|
| - log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
|
| - return;
|
| - }
|
| -
|
| - /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
|
| - test. this will cause an overflow in getPrev */
|
| - str[0] = 0x0041; /* 'A' */
|
| - /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
|
| - uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
|
| - str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */
|
| - iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
|
| - &status);
|
| - if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
|
| - status == U_BUFFER_OVERFLOW_ERROR) {
|
| - log_err("CE buffer should not overflow with long string of trail surrogates\n");
|
| - }
|
| - ucol_closeElements(iter);
|
| - ucol_close(coll);
|
| -}
|
| -
|
| -/**
|
| -* Checking collation element validity.
|
| -*/
|
| -#define MAX_CODEPOINTS_TO_SHOW 10
|
| -static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
|
| - int i, lengthToUse = length;
|
| - if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
|
| - lengthToUse = MAX_CODEPOINTS_TO_SHOW;
|
| - }
|
| - for (i = 0; i < lengthToUse; ++i) {
|
| - int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
|
| - if (bytesWritten <= 0) {
|
| - break;
|
| - }
|
| - codepointText += bytesWritten;
|
| - }
|
| - if (i < length) {
|
| - sprintf(codepointText, " ...");
|
| - }
|
| -}
|
| -
|
| -static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
|
| - int length)
|
| -{
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UCollationElements *iter = ucol_openElements(coll, codepoints, length,
|
| - &status);
|
| - UBool result = FALSE;
|
| - UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
|
| - const char * collLocale;
|
| -
|
| - if (U_FAILURE(status)) {
|
| - log_err("Error creating iterator for testing validity\n");
|
| - return FALSE;
|
| - }
|
| - collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
|
| - if (U_FAILURE(status) || collLocale==NULL) {
|
| - status = U_ZERO_ERROR;
|
| - collLocale = "?";
|
| - }
|
| -
|
| - for (;;) {
|
| - uint32_t ce = ucol_next(iter, &status);
|
| - uint32_t primary, p1, p2, secondary, tertiary;
|
| - if (ce == UCOL_NULLORDER) {
|
| - result = TRUE;
|
| - break;
|
| - }
|
| - if (ce == 0) {
|
| - continue;
|
| - }
|
| - if (ce == 0x02000202) {
|
| - /* special CE for merge-sort character */
|
| - if (*codepoints == 0xFFFE /* && length == 1 */) {
|
| - /*
|
| - * Note: We should check for length==1 but the token parser appears
|
| - * to give us trailing NUL characters.
|
| - * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
|
| - * rather than the internal collation rule parser
|
| - */
|
| - continue;
|
| - } else {
|
| - log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
|
| - (int)*codepoints, (int)length);
|
| - break;
|
| - }
|
| - }
|
| - primary = UCOL_PRIMARYORDER(ce);
|
| - p1 = primary >> 8;
|
| - p2 = primary & 0xFF;
|
| - secondary = UCOL_SECONDARYORDER(ce);
|
| - tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
|
| -
|
| - if (!isContinuation(ce)) {
|
| - if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
|
| - log_err("Empty CE %08lX except for case bits\n", (long)ce);
|
| - break;
|
| - }
|
| - if (p1 == 0) {
|
| - if (p2 != 0) {
|
| - log_err("Primary 00 xx in %08lX\n", (long)ce);
|
| - break;
|
| - }
|
| - primaryDone = TRUE;
|
| - } else {
|
| - if (p1 <= 2 || p1 >= 0xF0) {
|
| - /* Primary first bytes F0..FF are specials. */
|
| - log_err("Primary first byte of %08lX out of range\n", (long)ce);
|
| - break;
|
| - }
|
| - if (p2 == 0) {
|
| - primaryDone = TRUE;
|
| - } else {
|
| - if (p2 <= 3 || p2 >= 0xFF) {
|
| - /* Primary second bytes 03 and FF are sort key compression terminators. */
|
| - log_err("Primary second byte of %08lX out of range\n", (long)ce);
|
| - break;
|
| - }
|
| - primaryDone = FALSE;
|
| - }
|
| - }
|
| - if (secondary == 0) {
|
| - if (primary != 0) {
|
| - log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
|
| - break;
|
| - }
|
| - secondaryDone = TRUE;
|
| - } else {
|
| - if (secondary <= 2 ||
|
| - (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
|
| - ) {
|
| - /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
|
| - log_err("Secondary byte of %08lX out of range\n", (long)ce);
|
| - break;
|
| - }
|
| - secondaryDone = FALSE;
|
| - }
|
| - if (tertiary == 0) {
|
| - /* We know that ce != 0. */
|
| - log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
|
| - break;
|
| - }
|
| - if (tertiary <= 2) {
|
| - log_err("Tertiary byte of %08lX out of range\n", (long)ce);
|
| - break;
|
| - }
|
| - tertiaryDone = FALSE;
|
| - } else {
|
| - if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
|
| - log_err("Empty continuation %08lX\n", (long)ce);
|
| - break;
|
| - }
|
| - if (primaryDone && primary != 0) {
|
| - log_err("Primary was done but continues in %08lX\n", (long)ce);
|
| - break;
|
| - }
|
| - if (p1 == 0) {
|
| - if (p2 != 0) {
|
| - log_err("Primary 00 xx in %08lX\n", (long)ce);
|
| - break;
|
| - }
|
| - primaryDone = TRUE;
|
| - } else {
|
| - if (p1 <= 2) {
|
| - log_err("Primary first byte of %08lX out of range\n", (long)ce);
|
| - break;
|
| - }
|
| - if (p2 == 0) {
|
| - primaryDone = TRUE;
|
| - } else {
|
| - if (p2 <= 3) {
|
| - log_err("Primary second byte of %08lX out of range\n", (long)ce);
|
| - break;
|
| - }
|
| - }
|
| - }
|
| - if (secondaryDone && secondary != 0) {
|
| - log_err("Secondary was done but continues in %08lX\n", (long)ce);
|
| - break;
|
| - }
|
| - if (secondary == 0) {
|
| - secondaryDone = TRUE;
|
| - } else {
|
| - if (secondary <= 2) {
|
| - log_err("Secondary byte of %08lX out of range\n", (long)ce);
|
| - break;
|
| - }
|
| - }
|
| - if (tertiaryDone && tertiary != 0) {
|
| - log_err("Tertiary was done but continues in %08lX\n", (long)ce);
|
| - break;
|
| - }
|
| - if (tertiary == 0) {
|
| - tertiaryDone = TRUE;
|
| - } else if (tertiary <= 2) {
|
| - log_err("Tertiary byte of %08lX out of range\n", (long)ce);
|
| - break;
|
| - }
|
| - }
|
| - }
|
| - if (!result) {
|
| - char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
|
| - showCodepoints(codepoints, length, codepointText);
|
| - log_err("Locale: %s Code point string: %s\n", collLocale, codepointText);
|
| - }
|
| - ucol_closeElements(iter);
|
| - return result;
|
| -}
|
| -
|
| -static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 }; /* "[import" */
|
| -
|
| -static void TestCEValidity()
|
| -{
|
| - /* testing UCA collation elements */
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - /* en_US has no tailorings */
|
| - UCollator *coll = ucol_open("root", &status);
|
| - /* tailored locales */
|
| - char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
|
| - const char *loc;
|
| - FileStream *file = NULL;
|
| - char line[2048];
|
| - UChar codepoints[11];
|
| - int count = 0;
|
| - int maxCount = 0;
|
| - UChar contextCPs[3];
|
| - UChar32 c;
|
| - UParseError parseError;
|
| - if (U_FAILURE(status)) {
|
| - log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
|
| - return;
|
| - }
|
| - log_verbose("Testing UCA elements\n");
|
| - file = getFractionalUCA();
|
| - if (file == NULL) {
|
| - log_err("Fractional UCA data can not be opened\n");
|
| - return;
|
| - }
|
| -
|
| - while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
|
| - if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
|
| - line[0] == 0x000D || line[0] == '[') {
|
| - continue;
|
| - }
|
| -
|
| - getCodePoints(line, codepoints, contextCPs);
|
| - checkCEValidity(coll, codepoints, u_strlen(codepoints));
|
| - }
|
| -
|
| - log_verbose("Testing UCA elements for the whole range of unicode characters\n");
|
| - for (c = 0; c <= 0xffff; ++c) {
|
| - if (u_isdefined(c)) {
|
| - codepoints[0] = (UChar)c;
|
| - checkCEValidity(coll, codepoints, 1);
|
| - }
|
| - }
|
| - for (; c <= 0x10ffff; ++c) {
|
| - if (u_isdefined(c)) {
|
| - int32_t i = 0;
|
| - U16_APPEND_UNSAFE(codepoints, i, c);
|
| - checkCEValidity(coll, codepoints, i);
|
| - }
|
| - }
|
| -
|
| - ucol_close(coll);
|
| -
|
| - /* testing tailored collation elements */
|
| - log_verbose("Testing tailored elements\n");
|
| - if(getTestOption(QUICK_OPTION)) {
|
| - maxCount = sizeof(locale)/sizeof(locale[0]);
|
| - } else {
|
| - maxCount = uloc_countAvailable();
|
| - }
|
| - while (count < maxCount) {
|
| - const UChar *rules = NULL,
|
| - *current = NULL;
|
| - UChar *rulesCopy = NULL;
|
| - int32_t ruleLen = 0;
|
| -
|
| - uint32_t chOffset = 0;
|
| - uint32_t chLen = 0;
|
| - uint32_t exOffset = 0;
|
| - uint32_t exLen = 0;
|
| - uint32_t prefixOffset = 0;
|
| - uint32_t prefixLen = 0;
|
| - UBool startOfRules = TRUE;
|
| - UColOptionSet opts;
|
| -
|
| - UColTokenParser src;
|
| - uint32_t strength = 0;
|
| - uint16_t specs = 0;
|
| -
|
| - (void)specs; /* Suppress set but not used warnings. */
|
| - (void)strength;
|
| - (void)prefixLen;
|
| - (void)prefixOffset;
|
| - (void)exLen;
|
| - (void)exOffset;
|
| -
|
| - if(getTestOption(QUICK_OPTION)) {
|
| - loc = locale[count];
|
| - } else {
|
| - loc = uloc_getAvailable(count);
|
| - if(!hasCollationElements(loc)) {
|
| - count++;
|
| - continue;
|
| - }
|
| - }
|
| - status = U_ZERO_ERROR; // clear status from previous loop iteration
|
| -
|
| - uprv_memset(&src, 0, sizeof(UColTokenParser));
|
| -
|
| - log_verbose("Testing CEs for %s\n", loc);
|
| -
|
| - coll = ucol_open(loc, &status);
|
| - if (U_FAILURE(status)) {
|
| - log_err("%s collator creation failed with status %s\n", loc, u_errorName(status));
|
| - return;
|
| - }
|
| -
|
| - src.opts = &opts;
|
| - rules = ucol_getRules(coll, &ruleLen);
|
| -
|
| - /*
|
| - * We have not set up the UColTokenParser with a callback function
|
| - * to fetch [import] sub-rules,
|
| - * so skip testing tailorings that import others.
|
| - * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
|
| - * rather than the internal collation rule parser
|
| - */
|
| - if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
|
| - rulesCopy = (UChar *)uprv_malloc((ruleLen +
|
| - UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
|
| - uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
|
| - src.current = src.source = rulesCopy;
|
| - src.end = rulesCopy + ruleLen;
|
| - src.extraCurrent = src.end;
|
| - src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
| -
|
| - /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
| - the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
| - while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL && U_SUCCESS(status)) {
|
| - strength = src.parsedToken.strength;
|
| - chOffset = src.parsedToken.charsOffset;
|
| - chLen = src.parsedToken.charsLen;
|
| - exOffset = src.parsedToken.extensionOffset;
|
| - exLen = src.parsedToken.extensionLen;
|
| - prefixOffset = src.parsedToken.prefixOffset;
|
| - prefixLen = src.parsedToken.prefixLen;
|
| - specs = src.parsedToken.flags;
|
| -
|
| - startOfRules = FALSE;
|
| - uprv_memcpy(codepoints, src.source + chOffset,
|
| - chLen * sizeof(UChar));
|
| - codepoints[chLen] = 0;
|
| - checkCEValidity(coll, codepoints, chLen);
|
| - }
|
| - if (U_FAILURE(status)) {
|
| - log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", loc, u_errorName(status));
|
| - }
|
| - uprv_free(src.source);
|
| - uprv_free(src.reorderCodes);
|
| - }
|
| -
|
| - ucol_close(coll);
|
| - count ++;
|
| - }
|
| - T_FileStream_close(file);
|
| -}
|
| -
|
| -static void printSortKeyError(const UChar *codepoints, int length,
|
| - uint8_t *sortkey, int sklen)
|
| -{
|
| - int count = 0;
|
| - log_err("Sortkey not valid for ");
|
| - while (length > 0) {
|
| - log_err("0x%04x ", *codepoints);
|
| - length --;
|
| - codepoints ++;
|
| - }
|
| - log_err("\nSortkey : ");
|
| - while (count < sklen) {
|
| - log_err("0x%02x ", sortkey[count]);
|
| - count ++;
|
| - }
|
| - log_err("\n");
|
| -}
|
| -
|
| -/**
|
| -* Checking sort key validity for all levels
|
| -*/
|
| -static UBool checkSortKeyValidity(UCollator *coll,
|
| - const UChar *codepoints,
|
| - int length)
|
| -{
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
|
| - UCOL_TERTIARY, UCOL_QUATERNARY,
|
| - UCOL_IDENTICAL};
|
| - int strengthlen = 5;
|
| - int strengthIndex = 0;
|
| - int caselevel = 0;
|
| -
|
| - while (caselevel < 1) {
|
| - if (caselevel == 0) {
|
| - ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
|
| - }
|
| - else {
|
| - ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
|
| - }
|
| -
|
| - while (strengthIndex < strengthlen) {
|
| - int count01 = 0;
|
| - uint32_t count = 0;
|
| - uint8_t sortkey[128];
|
| - uint32_t sklen;
|
| -
|
| - ucol_setStrength(coll, strength[strengthIndex]);
|
| - sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
|
| - while (sortkey[count] != 0) {
|
| - if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
|
| - printSortKeyError(codepoints, length, sortkey, sklen);
|
| - return FALSE;
|
| - }
|
| - if (sortkey[count] == 1) {
|
| - count01 ++;
|
| - }
|
| - count ++;
|
| - }
|
| -
|
| - if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
|
| - printSortKeyError(codepoints, length, sortkey, sklen);
|
| - return FALSE;
|
| - }
|
| - strengthIndex ++;
|
| - }
|
| - caselevel ++;
|
| - }
|
| - return TRUE;
|
| -}
|
| -
|
| -static void TestSortKeyValidity(void)
|
| -{
|
| - /* testing UCA collation elements */
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - /* en_US has no tailorings */
|
| - UCollator *coll = ucol_open("en_US", &status);
|
| - /* tailored locales */
|
| - char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
|
| - FileStream *file = NULL;
|
| - char line[2048];
|
| - UChar codepoints[10];
|
| - int count = 0;
|
| - UChar contextCPs[5];
|
| - UParseError parseError;
|
| - if (U_FAILURE(status)) {
|
| - log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
|
| - return;
|
| - }
|
| - log_verbose("Testing UCA elements\n");
|
| - file = getFractionalUCA();
|
| - if (file == NULL) {
|
| - log_err("Fractional UCA data can not be opened\n");
|
| - return;
|
| - }
|
| -
|
| - while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
|
| - if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
|
| - line[0] == 0x000D || line[0] == '[') {
|
| - continue;
|
| - }
|
| -
|
| - getCodePoints(line, codepoints, contextCPs);
|
| - if(codepoints[0] == 0xFFFE) {
|
| - /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
|
| - continue;
|
| - }
|
| - checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
|
| - }
|
| -
|
| - log_verbose("Testing UCA elements for the whole range of unicode characters\n");
|
| - codepoints[0] = 0;
|
| -
|
| - while (codepoints[0] < 0xFFFF) {
|
| - if (u_isdefined((UChar32)codepoints[0])) {
|
| - checkSortKeyValidity(coll, codepoints, 1);
|
| - }
|
| - codepoints[0] ++;
|
| - }
|
| -
|
| - ucol_close(coll);
|
| -
|
| - /* testing tailored collation elements */
|
| - log_verbose("Testing tailored elements\n");
|
| - while (count < 5) {
|
| - const UChar *rules = NULL,
|
| - *current = NULL;
|
| - UChar *rulesCopy = NULL;
|
| - int32_t ruleLen = 0;
|
| -
|
| - uint32_t chOffset = 0;
|
| - uint32_t chLen = 0;
|
| - uint32_t exOffset = 0;
|
| - uint32_t exLen = 0;
|
| - uint32_t prefixOffset = 0;
|
| - uint32_t prefixLen = 0;
|
| - UBool startOfRules = TRUE;
|
| - UColOptionSet opts;
|
| -
|
| - UColTokenParser src;
|
| - uint32_t strength = 0;
|
| - uint16_t specs = 0;
|
| - status = U_ZERO_ERROR; // clear status from previous loop iteration
|
| -
|
| - (void)specs;
|
| - (void)strength;
|
| - (void)prefixLen;
|
| - (void)prefixOffset;
|
| - (void)exLen;
|
| - (void)exOffset;
|
| -
|
| - uprv_memset(&src, 0, sizeof(UColTokenParser));
|
| -
|
| - coll = ucol_open(locale[count], &status);
|
| - if (U_FAILURE(status)) {
|
| - log_err("%s collator creation failed with status %s\n", locale[count], u_errorName(status));
|
| - return;
|
| - }
|
| -
|
| - src.opts = &opts;
|
| - rules = ucol_getRules(coll, &ruleLen);
|
| -
|
| - /*
|
| - * We have not set up the UColTokenParser with a callback function
|
| - * to fetch [import] sub-rules,
|
| - * so skip testing tailorings that import others.
|
| - * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailoredSet()
|
| - * rather than the internal collation rule parser
|
| - */
|
| - if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
|
| - rulesCopy = (UChar *)uprv_malloc((ruleLen +
|
| - UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
|
| - uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
|
| - src.current = src.source = rulesCopy;
|
| - src.end = rulesCopy + ruleLen;
|
| - src.extraCurrent = src.end;
|
| - src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
| -
|
| - /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
| - the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
| - while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL && U_SUCCESS(status)) {
|
| - strength = src.parsedToken.strength;
|
| - chOffset = src.parsedToken.charsOffset;
|
| - chLen = src.parsedToken.charsLen;
|
| - exOffset = src.parsedToken.extensionOffset;
|
| - exLen = src.parsedToken.extensionLen;
|
| - prefixOffset = src.parsedToken.prefixOffset;
|
| - prefixLen = src.parsedToken.prefixLen;
|
| - specs = src.parsedToken.flags;
|
| -
|
| - startOfRules = FALSE;
|
| - uprv_memcpy(codepoints, src.source + chOffset,
|
| - chLen * sizeof(UChar));
|
| - codepoints[chLen] = 0;
|
| - if(codepoints[0] == 0xFFFE) {
|
| - /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
|
| - continue;
|
| - }
|
| - checkSortKeyValidity(coll, codepoints, chLen);
|
| - }
|
| - if (U_FAILURE(status)) {
|
| - log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", locale[count], u_errorName(status));
|
| - }
|
| - uprv_free(src.source);
|
| - uprv_free(src.reorderCodes);
|
| - }
|
| -
|
| - ucol_close(coll);
|
| - count ++;
|
| - }
|
| - T_FileStream_close(file);
|
| -}
|
| -
|
| /**
|
| * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
|
| * normalization on AND jamo tailoring, among other things.
|
| +*
|
| +* Note: This test is sensitive to changes of the root collator,
|
| +* for example whether the ae-ligature maps to three CEs (as in the DUCET)
|
| +* or to two CEs (as in the CLDR 24 FractionalUCA.txt).
|
| +* It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
|
| +* For example, the DUCET's artificial secondary CE in the ae-ligature
|
| +* may map to two 32-bit iterator CEs (as it did until ICU 52).
|
| */
|
| static const UChar tsceText[] = { /* Nothing in here should be ignorable */
|
| 0x0020, 0xAC00, /* simple LV Hangul */
|
| @@ -2089,7 +1145,7 @@ static const int32_t rootStandardOffsets[] = {
|
| 12, 13,14,15,
|
| 16, 17,18,19,
|
| 20, 21,22,23,
|
| - 24, 25,26,26,26,
|
| + 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
|
| 26, 27,28,28,
|
| 28,
|
| 29
|
| @@ -2105,7 +1161,7 @@ static const int32_t rootSearchOffsets[] = {
|
| 12, 13,14,15,
|
| 16, 17,18,19,20,
|
| 20, 21,22,22,23,23,23,24,
|
| - 24, 25,26,26,26,
|
| + 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
|
| 26, 27,28,28,
|
| 28,
|
| 29
|
| @@ -2142,6 +1198,7 @@ static void TestSearchCollatorElements(void)
|
| do {
|
| offset = ucol_getOffset(uce);
|
| element = ucol_next(uce, &status);
|
| + log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element);
|
| if ( element == 0 ) {
|
| log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
|
| }
|
|
|