source/test/cintltst/citertst.c - Issue 845603002: Update ICU to 54.1 step 1

Unified Diff: source/test/cintltst/citertst.c

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/test/cintltst/citertst.c

diff --git a/source/test/cintltst/citertst.c b/source/test/cintltst/citertst.c

index e8e2cff0ff36fdef49989e77bd55b52aacfc80fd..c49487ab8d18cc0c613ad33058cfa9e35bae447a 100644

--- a/source/test/cintltst/citertst.c

+++ b/source/test/cintltst/citertst.c

@@ -1,6 +1,6 @@

/********************************************************************

* COPYRIGHT:

********************************************************************/

/********************************************************************************

@@ -35,7 +35,6 @@

#include "filestrm.h"

#include "cstring.h"

#include "ucol_imp.h"

-#include "ucol_tok.h"

#include "uparse.h"

#include <stdio.h>

@@ -54,11 +53,7 @@ void addCollIterTest(TestNode** root)

addTest(root, &TestBug672, "tscoll/citertst/TestBug672");

addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");

addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");

- addTest(root, &TestCEs, "tscoll/citertst/TestCEs");

addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");

- addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");

- addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");

- addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");

addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");

}

@@ -763,15 +758,10 @@ static void TestSetText()

/* Now set it to point to a null string with fake length*/

ucol_setText(iter2, NULL, 2, &status);

- if (U_FAILURE(status))

+ if (status != U_ILLEGAL_ARGUMENT_ERROR)

{

- log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));

- }

- else

- {

- if (ucol_next(iter2, &status) != UCOL_NULLORDER) {

- log_err("iter2 with null text expected to return UCOL_NULLORDER\n");

- }

+ log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",

+ myErrorName(status));

}

ucol_closeElements(iter2);

@@ -1011,353 +1001,6 @@ static void TestSmallBuffer()

}

/**

-* Sniplets of code from genuca

-*/

-static int32_t hex2num(char hex) {

- if(hex>='0' && hex <='9') {

- return hex-'0';

- } else if(hex>='a' && hex<='f') {

- return hex-'a'+10;

- } else if(hex>='A' && hex<='F') {

- return hex-'A'+10;

- } else {

- return 0;

- }

-/**

-* Getting codepoints from a string

-* @param str character string contain codepoints seperated by space and ended

-* by a semicolon

-* @param codepoints array for storage, assuming size > 5

-* @return position at the end of the codepoint section

-*/

-static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {

- UErrorCode errorCode = U_ZERO_ERROR;

- char *semi = uprv_strchr(str, ';');

- char *pipe = uprv_strchr(str, '|');

- char *s;

- *codepoints = 0;

- *contextCPs = 0;

- if(semi == NULL) {

- log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);

- return str;

- }

- if(pipe != NULL) {

- int32_t contextLength;

- *pipe = 0;

- contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);

- *pipe = '|';

- if(U_FAILURE(errorCode)) {

- log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);

- return str;

- }

- /* prepend the precontext string to the codepoints */

- u_memcpy(codepoints, contextCPs, contextLength);

- codepoints += contextLength;

- /* start of the code point string */

- s = pipe + 1;

- } else {

- s = str;

- }

- u_parseString(s, codepoints, 99, NULL, &errorCode);

- if(U_FAILURE(errorCode)) {

- log_err("error parsing code point string from FractionalUCA.txt %s\n", str);

- return str;

- }

- return semi + 1;

-/**

-* Sniplets of code from genuca

-*/

-static int32_t

-readElement(char **from, char *to, char separator, UErrorCode *status)

- if (U_SUCCESS(*status)) {

- char buffer[1024];

- int32_t i = 0;

- while (**from != separator) {

- if (**from != ' ') {

- *(buffer+i++) = **from;

- }

- (*from)++;

- }

- (*from)++;

- *(buffer + i) = 0;

- strcpy(to, buffer);

- return i/2;

- }

- return 0;

-/**

-* Sniplets of code from genuca

-*/

-static uint32_t

-getSingleCEValue(char *primary, char *secondary, char *tertiary,

- UErrorCode *status)

- if (U_SUCCESS(*status)) {

- uint32_t value = 0;

- char primsave = '\0';

- char secsave = '\0';

- char tersave = '\0';

- char *primend = primary+4;

- char *secend = secondary+2;

- char *terend = tertiary+2;

- uint32_t primvalue;

- uint32_t secvalue;

- uint32_t tervalue;

- if (uprv_strlen(primary) > 4) {

- primsave = *primend;

- *primend = '\0';

- }

- if (uprv_strlen(secondary) > 2) {

- secsave = *secend;

- *secend = '\0';

- }

- if (uprv_strlen(tertiary) > 2) {

- tersave = *terend;

- *terend = '\0';

- }

- primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;

- secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;

- tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;

- if(primvalue <= 0xFF) {

- primvalue <<= 8;

- }

- value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)

- | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)

- | (tervalue & UCOL_TERTIARYORDERMASK);

- if(primsave!='\0') {

- *primend = primsave;

- }

- if(secsave!='\0') {

- *secend = secsave;

- }

- if(tersave!='\0') {

- *terend = tersave;

- }

- return value;

- }

- return 0;

-/**

-* Getting collation elements generated from a string

-* @param str character string contain collation elements contained in [] and

-* seperated by space

-* @param ce array for storage, assuming size > 20

-* @param status error status

-* @return position at the end of the codepoint section

-*/

-static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {

- char *pStartCP = uprv_strchr(str, '[');

- int count = 0;

- char *pEndCP;

- char primary[100];

- char secondary[100];

- char tertiary[100];

- while (*pStartCP == '[') {

- uint32_t primarycount = 0;

- uint32_t secondarycount = 0;

- uint32_t tertiarycount = 0;

- uint32_t CEi = 1;

- pEndCP = strchr(pStartCP, ']');

- if(pEndCP == NULL) {

- break;

- }

- pStartCP ++;

- primarycount = readElement(&pStartCP, primary, ',', status);

- secondarycount = readElement(&pStartCP, secondary, ',', status);

- tertiarycount = readElement(&pStartCP, tertiary, ']', status);

- /* I want to get the CEs entered right here, including continuation */

- ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);

- if (U_FAILURE(*status)) {

- break;

- }

- while (2 * CEi < primarycount || CEi < secondarycount ||

- CEi < tertiarycount) {

- uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */

- if (2 * CEi < primarycount) {

- value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);

- value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);

- }

- if (2 * CEi + 1 < primarycount) {

- value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);

- value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);

- }

- if (CEi < secondarycount) {

- value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);

- value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);

- }

- if (CEi < tertiarycount) {

- value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);

- value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);

- }

- CEi ++;

- ces[count ++] = value;

- }

- pStartCP = pEndCP + 1;

- }

- ces[count] = 0;

- return pStartCP;

-/**

-* Getting the FractionalUCA.txt file stream

-*/

-static FileStream * getFractionalUCA(void)

- char newPath[256];

- char backupPath[256];

- FileStream *result = NULL;

- /* Look inside ICU_DATA first */

- uprv_strcpy(newPath, ctest_dataSrcDir());

- uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );

- uprv_strcat(newPath, "FractionalUCA.txt");

- /* As a fallback, try to guess where the source data was located

- * at the time ICU was built, and look there.

- */

-#if defined (U_TOPSRCDIR)

- strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");

-#else

- {

- UErrorCode errorCode = U_ZERO_ERROR;

- strcpy(backupPath, loadTestData(&errorCode));

- strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");

- }

-#endif

- strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");

- result = T_FileStream_open(newPath, "rb");

- if (result == NULL) {

- result = T_FileStream_open(backupPath, "rb");

- if (result == NULL) {

- log_err("Failed to open either %s or %s\n", newPath, backupPath);

- }

- return result;

-/**

-* Testing the CEs returned by the iterator

-*/

-static void TestCEs() {

- FileStream *file = NULL;

- char line[2048];

- char *str;

- UChar codepoints[10];

- uint32_t ces[20];

- UErrorCode status = U_ZERO_ERROR;

- UCollator *coll = ucol_open("", &status);

- uint32_t lineNo = 0;

- UChar contextCPs[5];

- if (U_FAILURE(status)) {

- log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));

- return;

- }

- file = getFractionalUCA();

- if (file == NULL) {

- log_err("*** unable to open input FractionalUCA.txt file ***\n");

- return;

- }

- while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {

- int count = 0;

- UCollationElements *iter;

- int32_t preContextCeLen=0;

- lineNo++;

- /* skip this line if it is empty or a comment or is a return value

- or start of some variable section */

- if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||

- line[0] == 0x000D || line[0] == '[') {

- continue;

- }

- str = getCodePoints(line, codepoints, contextCPs);

- /* these are 'fake' codepoints in the fractional UCA, and are used just

- * for positioning of indirect values. They should not go through this

- * test.

- */

- if(*codepoints == 0xFDD0) {

- continue;

- }

- if (*contextCPs != 0) {

- iter = ucol_openElements(coll, contextCPs, -1, &status);

- if (U_FAILURE(status)) {

- log_err("Error in opening collation elements\n");

- break;

- }

- while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {

- preContextCeLen++;

- }

- ucol_closeElements(iter);

- }

- getCEs(str, ces+preContextCeLen, &status);

- if (U_FAILURE(status)) {

- log_err("Error in parsing collation elements in FractionalUCA.txt\n");

- break;

- }

- iter = ucol_openElements(coll, codepoints, -1, &status);

- if (U_FAILURE(status)) {

- log_err("Error in opening collation elements\n");

- break;

- }

- for (;;) {

- uint32_t ce = (uint32_t)ucol_next(iter, &status);

- if (ce == 0xFFFFFFFF) {

- ce = 0;

- }

- /* we now unconditionally reorder Thai/Lao prevowels, so this

- * test would fail if we don't skip here.

- */

- if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {

- continue;

- }

- if (ce != ces[count] || U_FAILURE(status)) {

- log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");

- break;

- }

- if (ces[count] == 0) {

- break;

- }

- count ++;

- }

- ucol_closeElements(iter);

- }

- T_FileStream_close(file);

- ucol_close(coll);

-/**

* Testing the discontigous contractions

static void TestDiscontiguos() {

@@ -1467,603 +1110,16 @@ static void TestDiscontiguos() {

ucol_close(coll);

}

-static void TestCEBufferOverflow()

- UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];

- UErrorCode status = U_ZERO_ERROR;

- UChar rule[10];

- UCollator *coll;

- UCollationElements *iter;

- u_uastrcpy(rule, "&z < AB");

- coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);

- if (U_FAILURE(status)) {

- log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));

- return;

- }

- /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic

- test. this will cause an overflow in getPrev */

- str[0] = 0x0041; /* 'A' */

- /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/

- uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);

- str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */

- iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,

- &status);

- if (ucol_previous(iter, &status) == UCOL_NULLORDER ||

- status == U_BUFFER_OVERFLOW_ERROR) {

- log_err("CE buffer should not overflow with long string of trail surrogates\n");

- }

- ucol_closeElements(iter);

- ucol_close(coll);

-/**

-* Checking collation element validity.

-*/

-#define MAX_CODEPOINTS_TO_SHOW 10

-static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {

- int i, lengthToUse = length;

- if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {

- lengthToUse = MAX_CODEPOINTS_TO_SHOW;

- }

- for (i = 0; i < lengthToUse; ++i) {

- int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);

- if (bytesWritten <= 0) {

- break;

- }

- codepointText += bytesWritten;

- }

- if (i < length) {

- sprintf(codepointText, " ...");

- }

-static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,

- int length)

- UErrorCode status = U_ZERO_ERROR;

- UCollationElements *iter = ucol_openElements(coll, codepoints, length,

- &status);

- UBool result = FALSE;

- UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;

- const char * collLocale;

- if (U_FAILURE(status)) {

- log_err("Error creating iterator for testing validity\n");

- return FALSE;

- }

- collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);

- if (U_FAILURE(status) || collLocale==NULL) {

- status = U_ZERO_ERROR;

- collLocale = "?";

- }

- for (;;) {

- uint32_t ce = ucol_next(iter, &status);

- uint32_t primary, p1, p2, secondary, tertiary;

- if (ce == UCOL_NULLORDER) {

- result = TRUE;

- break;

- }

- if (ce == 0) {

- continue;

- }

- if (ce == 0x02000202) {

- /* special CE for merge-sort character */

- if (*codepoints == 0xFFFE /* && length == 1 */) {

- /*

- * Note: We should check for length==1 but the token parser appears

- * to give us trailing NUL characters.

- * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()

- * rather than the internal collation rule parser

- */

- continue;

- } else {

- log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",

- (int)*codepoints, (int)length);

- break;

- }

- primary = UCOL_PRIMARYORDER(ce);

- p1 = primary >> 8;

- p2 = primary & 0xFF;

- secondary = UCOL_SECONDARYORDER(ce);

- tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;

- if (!isContinuation(ce)) {

- if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {

- log_err("Empty CE %08lX except for case bits\n", (long)ce);

- break;

- }

- if (p1 == 0) {

- if (p2 != 0) {

- log_err("Primary 00 xx in %08lX\n", (long)ce);

- break;

- }

- primaryDone = TRUE;

- } else {

- if (p1 <= 2 || p1 >= 0xF0) {

- /* Primary first bytes F0..FF are specials. */

- log_err("Primary first byte of %08lX out of range\n", (long)ce);

- break;

- }

- if (p2 == 0) {

- primaryDone = TRUE;

- } else {

- if (p2 <= 3 || p2 >= 0xFF) {

- /* Primary second bytes 03 and FF are sort key compression terminators. */

- log_err("Primary second byte of %08lX out of range\n", (long)ce);

- break;

- }

- primaryDone = FALSE;

- }

- if (secondary == 0) {

- if (primary != 0) {

- log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);

- break;

- }

- secondaryDone = TRUE;

- } else {

- if (secondary <= 2 ||

- (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))

- ) {

- /* Secondary first bytes common+1..+0x80 are used for sort key compression. */

- log_err("Secondary byte of %08lX out of range\n", (long)ce);

- break;

- }

- secondaryDone = FALSE;

- }

- if (tertiary == 0) {

- /* We know that ce != 0. */

- log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);

- break;

- }

- if (tertiary <= 2) {

- log_err("Tertiary byte of %08lX out of range\n", (long)ce);

- break;

- }

- tertiaryDone = FALSE;

- } else {

- if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {

- log_err("Empty continuation %08lX\n", (long)ce);

- break;

- }

- if (primaryDone && primary != 0) {

- log_err("Primary was done but continues in %08lX\n", (long)ce);

- break;

- }

- if (p1 == 0) {

- if (p2 != 0) {

- log_err("Primary 00 xx in %08lX\n", (long)ce);

- break;

- }

- primaryDone = TRUE;

- } else {

- if (p1 <= 2) {

- log_err("Primary first byte of %08lX out of range\n", (long)ce);

- break;

- }

- if (p2 == 0) {

- primaryDone = TRUE;

- } else {

- if (p2 <= 3) {

- log_err("Primary second byte of %08lX out of range\n", (long)ce);

- break;

- }

- if (secondaryDone && secondary != 0) {

- log_err("Secondary was done but continues in %08lX\n", (long)ce);

- break;

- }

- if (secondary == 0) {

- secondaryDone = TRUE;

- } else {

- if (secondary <= 2) {

- log_err("Secondary byte of %08lX out of range\n", (long)ce);

- break;

- }

- if (tertiaryDone && tertiary != 0) {

- log_err("Tertiary was done but continues in %08lX\n", (long)ce);

- break;

- }

- if (tertiary == 0) {

- tertiaryDone = TRUE;

- } else if (tertiary <= 2) {

- log_err("Tertiary byte of %08lX out of range\n", (long)ce);

- break;

- }

- if (!result) {

- char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];

- showCodepoints(codepoints, length, codepointText);

- log_err("Locale: %s Code point string: %s\n", collLocale, codepointText);

- }

- ucol_closeElements(iter);

- return result;

-static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 }; /* "[import" */

-static void TestCEValidity()

- /* testing UCA collation elements */

- UErrorCode status = U_ZERO_ERROR;

- /* en_US has no tailorings */

- UCollator *coll = ucol_open("root", &status);

- /* tailored locales */

- char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};

- const char *loc;

- FileStream *file = NULL;

- char line[2048];

- UChar codepoints[11];

- int count = 0;

- int maxCount = 0;

- UChar contextCPs[3];

- UChar32 c;

- UParseError parseError;

- if (U_FAILURE(status)) {

- log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));

- return;

- }

- log_verbose("Testing UCA elements\n");

- file = getFractionalUCA();

- if (file == NULL) {

- log_err("Fractional UCA data can not be opened\n");

- return;

- }

- while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {

- if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||

- line[0] == 0x000D || line[0] == '[') {

- continue;

- }

- getCodePoints(line, codepoints, contextCPs);

- checkCEValidity(coll, codepoints, u_strlen(codepoints));

- }

- log_verbose("Testing UCA elements for the whole range of unicode characters\n");

- for (c = 0; c <= 0xffff; ++c) {

- if (u_isdefined(c)) {

- codepoints[0] = (UChar)c;

- checkCEValidity(coll, codepoints, 1);

- }

- for (; c <= 0x10ffff; ++c) {

- if (u_isdefined(c)) {

- int32_t i = 0;

- U16_APPEND_UNSAFE(codepoints, i, c);

- checkCEValidity(coll, codepoints, i);

- }

- ucol_close(coll);

- /* testing tailored collation elements */

- log_verbose("Testing tailored elements\n");

- if(getTestOption(QUICK_OPTION)) {

- maxCount = sizeof(locale)/sizeof(locale[0]);

- } else {

- maxCount = uloc_countAvailable();

- }

- while (count < maxCount) {

- const UChar *rules = NULL,

- *current = NULL;

- UChar *rulesCopy = NULL;

- int32_t ruleLen = 0;

- uint32_t chOffset = 0;

- uint32_t chLen = 0;

- uint32_t exOffset = 0;

- uint32_t exLen = 0;

- uint32_t prefixOffset = 0;

- uint32_t prefixLen = 0;

- UBool startOfRules = TRUE;

- UColOptionSet opts;

- UColTokenParser src;

- uint32_t strength = 0;

- uint16_t specs = 0;

- (void)specs; /* Suppress set but not used warnings. */

- (void)strength;

- (void)prefixLen;

- (void)prefixOffset;

- (void)exLen;

- (void)exOffset;

- if(getTestOption(QUICK_OPTION)) {

- loc = locale[count];

- } else {

- loc = uloc_getAvailable(count);

- if(!hasCollationElements(loc)) {

- count++;

- continue;

- }

- status = U_ZERO_ERROR; // clear status from previous loop iteration

- uprv_memset(&src, 0, sizeof(UColTokenParser));

- log_verbose("Testing CEs for %s\n", loc);

- coll = ucol_open(loc, &status);

- if (U_FAILURE(status)) {

- log_err("%s collator creation failed with status %s\n", loc, u_errorName(status));

- return;

- }

- src.opts = &opts;

- rules = ucol_getRules(coll, &ruleLen);

- /*

- * We have not set up the UColTokenParser with a callback function

- * to fetch [import] sub-rules,

- * so skip testing tailorings that import others.

- * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()

- * rather than the internal collation rule parser

- */

- if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {

- rulesCopy = (UChar *)uprv_malloc((ruleLen +

- UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));

- uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));

- src.current = src.source = rulesCopy;

- src.end = rulesCopy + ruleLen;

- src.extraCurrent = src.end;

- src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to

- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */

- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL && U_SUCCESS(status)) {

- strength = src.parsedToken.strength;

- chOffset = src.parsedToken.charsOffset;

- chLen = src.parsedToken.charsLen;

- exOffset = src.parsedToken.extensionOffset;

- exLen = src.parsedToken.extensionLen;

- prefixOffset = src.parsedToken.prefixOffset;

- prefixLen = src.parsedToken.prefixLen;

- specs = src.parsedToken.flags;

- startOfRules = FALSE;

- uprv_memcpy(codepoints, src.source + chOffset,

- chLen * sizeof(UChar));

- codepoints[chLen] = 0;

- checkCEValidity(coll, codepoints, chLen);

- }

- if (U_FAILURE(status)) {

- log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", loc, u_errorName(status));

- }

- uprv_free(src.source);

- uprv_free(src.reorderCodes);

- }

- ucol_close(coll);

- count ++;

- }

- T_FileStream_close(file);

-static void printSortKeyError(const UChar *codepoints, int length,

- uint8_t *sortkey, int sklen)

- int count = 0;

- log_err("Sortkey not valid for ");

- while (length > 0) {

- log_err("0x%04x ", *codepoints);

- length --;

- codepoints ++;

- }

- log_err("\nSortkey : ");

- while (count < sklen) {

- log_err("0x%02x ", sortkey[count]);

- count ++;

- }

- log_err("\n");

-/**

-* Checking sort key validity for all levels

-*/

-static UBool checkSortKeyValidity(UCollator *coll,

- const UChar *codepoints,

- int length)

- UErrorCode status = U_ZERO_ERROR;

- UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,

- UCOL_TERTIARY, UCOL_QUATERNARY,

- UCOL_IDENTICAL};

- int strengthlen = 5;

- int strengthIndex = 0;

- int caselevel = 0;

- while (caselevel < 1) {

- if (caselevel == 0) {

- ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);

- }

- else {

- ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);

- }

- while (strengthIndex < strengthlen) {

- int count01 = 0;

- uint32_t count = 0;

- uint8_t sortkey[128];

- uint32_t sklen;

- ucol_setStrength(coll, strength[strengthIndex]);

- sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);

- while (sortkey[count] != 0) {

- if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {

- printSortKeyError(codepoints, length, sortkey, sklen);

- return FALSE;

- }

- if (sortkey[count] == 1) {

- count01 ++;

- }

- count ++;

- }

- if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {

- printSortKeyError(codepoints, length, sortkey, sklen);

- return FALSE;

- }

- strengthIndex ++;

- }

- caselevel ++;

- }

- return TRUE;

-static void TestSortKeyValidity(void)

- /* testing UCA collation elements */

- UErrorCode status = U_ZERO_ERROR;

- /* en_US has no tailorings */

- UCollator *coll = ucol_open("en_US", &status);

- /* tailored locales */

- char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};

- FileStream *file = NULL;

- char line[2048];

- UChar codepoints[10];

- int count = 0;

- UChar contextCPs[5];

- UParseError parseError;

- if (U_FAILURE(status)) {

- log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));

- return;

- }

- log_verbose("Testing UCA elements\n");

- file = getFractionalUCA();

- if (file == NULL) {

- log_err("Fractional UCA data can not be opened\n");

- return;

- }

- while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {

- if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||

- line[0] == 0x000D || line[0] == '[') {

- continue;

- }

- getCodePoints(line, codepoints, contextCPs);

- if(codepoints[0] == 0xFFFE) {

- /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */

- continue;

- }

- checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));

- }

- log_verbose("Testing UCA elements for the whole range of unicode characters\n");

- codepoints[0] = 0;

- while (codepoints[0] < 0xFFFF) {

- if (u_isdefined((UChar32)codepoints[0])) {

- checkSortKeyValidity(coll, codepoints, 1);

- }

- codepoints[0] ++;

- }

- ucol_close(coll);

- /* testing tailored collation elements */

- log_verbose("Testing tailored elements\n");

- while (count < 5) {

- const UChar *rules = NULL,

- *current = NULL;

- UChar *rulesCopy = NULL;

- int32_t ruleLen = 0;

- uint32_t chOffset = 0;

- uint32_t chLen = 0;

- uint32_t exOffset = 0;

- uint32_t exLen = 0;

- uint32_t prefixOffset = 0;

- uint32_t prefixLen = 0;

- UBool startOfRules = TRUE;

- UColOptionSet opts;

- UColTokenParser src;

- uint32_t strength = 0;

- uint16_t specs = 0;

- status = U_ZERO_ERROR; // clear status from previous loop iteration

- (void)specs;

- (void)strength;

- (void)prefixLen;

- (void)prefixOffset;

- (void)exLen;

- (void)exOffset;

- uprv_memset(&src, 0, sizeof(UColTokenParser));

- coll = ucol_open(locale[count], &status);

- if (U_FAILURE(status)) {

- log_err("%s collator creation failed with status %s\n", locale[count], u_errorName(status));

- return;

- }

- src.opts = &opts;

- rules = ucol_getRules(coll, &ruleLen);

- /*

- * We have not set up the UColTokenParser with a callback function

- * to fetch [import] sub-rules,

- * so skip testing tailorings that import others.

- * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailoredSet()

- * rather than the internal collation rule parser

- */

- if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {

- rulesCopy = (UChar *)uprv_malloc((ruleLen +

- UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));

- uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));

- src.current = src.source = rulesCopy;

- src.end = rulesCopy + ruleLen;

- src.extraCurrent = src.end;

- src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to

- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */

- while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL && U_SUCCESS(status)) {

- strength = src.parsedToken.strength;

- chOffset = src.parsedToken.charsOffset;

- chLen = src.parsedToken.charsLen;

- exOffset = src.parsedToken.extensionOffset;

- exLen = src.parsedToken.extensionLen;

- prefixOffset = src.parsedToken.prefixOffset;

- prefixLen = src.parsedToken.prefixLen;

- specs = src.parsedToken.flags;

- startOfRules = FALSE;

- uprv_memcpy(codepoints, src.source + chOffset,

- chLen * sizeof(UChar));

- codepoints[chLen] = 0;

- if(codepoints[0] == 0xFFFE) {

- /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */

- continue;

- }

- checkSortKeyValidity(coll, codepoints, chLen);

- }

- if (U_FAILURE(status)) {

- log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", locale[count], u_errorName(status));

- }

- uprv_free(src.source);

- uprv_free(src.reorderCodes);

- }

- ucol_close(coll);

- count ++;

- }

- T_FileStream_close(file);

/**

* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with

* normalization on AND jamo tailoring, among other things.

+* Note: This test is sensitive to changes of the root collator,

+* for example whether the ae-ligature maps to three CEs (as in the DUCET)

+* or to two CEs (as in the CLDR 24 FractionalUCA.txt).

+* It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.

+* For example, the DUCET's artificial secondary CE in the ae-ligature

+* may map to two 32-bit iterator CEs (as it did until ICU 52).

static const UChar tsceText[] = { /* Nothing in here should be ignorable */

0x0020, 0xAC00, /* simple LV Hangul */

@@ -2089,7 +1145,7 @@ static const int32_t rootStandardOffsets[] = {

12, 13,14,15,

16, 17,18,19,

20, 21,22,23,

- 24, 25,26,26,26,

+ 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */

26, 27,28,28,

28,

@@ -2105,7 +1161,7 @@ static const int32_t rootSearchOffsets[] = {

12, 13,14,15,

16, 17,18,19,20,

20, 21,22,22,23,23,23,24,

- 24, 25,26,26,26,

+ 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */

26, 27,28,28,

28,

@@ -2142,6 +1198,7 @@ static void TestSearchCollatorElements(void)

do {

offset = ucol_getOffset(uce);

element = ucol_next(uce, &status);

+ log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element);

if ( element == 0 ) {

log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );

}

« no previous file with comments | « source/test/cintltst/citertst.h ('k') | source/test/cintltst/cldrtest.c » ('j') | no next file with comments »