| Index: source/test/cintltst/cmsccoll.c
|
| diff --git a/source/test/cintltst/cmsccoll.c b/source/test/cintltst/cmsccoll.c
|
| index b47472806b246bc3c294b6ed7f7dbe2014a7577b..d20cd9a93aeed7ccf1123ce532201b071b8d4144 100644
|
| --- a/source/test/cintltst/cmsccoll.c
|
| +++ b/source/test/cintltst/cmsccoll.c
|
| @@ -1,7 +1,7 @@
|
|
|
| /********************************************************************
|
| * COPYRIGHT:
|
| - * Copyright (c) 2001-2013, International Business Machines Corporation and
|
| + * Copyright (c) 2001-2014, International Business Machines Corporation and
|
| * others. All Rights Reserved.
|
| ********************************************************************/
|
| /*******************************************************************************
|
| @@ -29,7 +29,6 @@
|
| #include "unicode/ustring.h"
|
| #include "string.h"
|
| #include "ucol_imp.h"
|
| -#include "ucol_tok.h"
|
| #include "cmemory.h"
|
| #include "cstring.h"
|
| #include "uassert.h"
|
| @@ -413,1117 +412,6 @@ static void BillFairmanTest(void) {
|
| }
|
| }
|
|
|
| -static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
|
| - UChar source[256] = { '\0'};
|
| - UChar target[256] = { '\0'};
|
| - UChar preP = 0x31a3;
|
| - UChar preQ = 0x310d;
|
| -/*
|
| - UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
|
| - UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
|
| -*/
|
| - /*log_verbose("Testing primary\n");*/
|
| -
|
| - doTest(col, p, q, UCOL_LESS);
|
| -/*
|
| - UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
| -
|
| - if(result!=UCOL_LESS){
|
| - aescstrdup(p,utfSource,256);
|
| - aescstrdup(q,utfTarget,256);
|
| - fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
|
| - }
|
| -*/
|
| - source[0] = preP;
|
| - u_strcpy(source+1,p);
|
| - target[0] = preQ;
|
| - u_strcpy(target+1,q);
|
| - doTest(col, source, target, UCOL_LESS);
|
| -/*
|
| - fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
|
| -*/
|
| -}
|
| -
|
| -static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
|
| - UChar source[256] = { '\0'};
|
| - UChar target[256] = { '\0'};
|
| -
|
| - /*log_verbose("Testing secondary\n");*/
|
| -
|
| - doTest(col, p, q, UCOL_LESS);
|
| -/*
|
| - fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
|
| -*/
|
| - source[0] = 0x0053;
|
| - u_strcpy(source+1,p);
|
| - target[0]= 0x0073;
|
| - u_strcpy(target+1,q);
|
| -
|
| - doTest(col, source, target, UCOL_LESS);
|
| -/*
|
| - fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
|
| -*/
|
| -
|
| -
|
| - u_strcpy(source,p);
|
| - source[u_strlen(p)] = 0x62;
|
| - source[u_strlen(p)+1] = 0;
|
| -
|
| -
|
| - u_strcpy(target,q);
|
| - target[u_strlen(q)] = 0x61;
|
| - target[u_strlen(q)+1] = 0;
|
| -
|
| - doTest(col, source, target, UCOL_GREATER);
|
| -
|
| -/*
|
| - fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
|
| -*/
|
| -}
|
| -
|
| -static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
|
| - UChar source[256] = { '\0'};
|
| - UChar target[256] = { '\0'};
|
| -
|
| - /*log_verbose("Testing tertiary\n");*/
|
| -
|
| - doTest(col, p, q, UCOL_LESS);
|
| -/*
|
| - fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
|
| -*/
|
| - source[0] = 0x0020;
|
| - u_strcpy(source+1,p);
|
| - target[0]= 0x002D;
|
| - u_strcpy(target+1,q);
|
| -
|
| - doTest(col, source, target, UCOL_LESS);
|
| -/*
|
| - fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
|
| -*/
|
| -
|
| - u_strcpy(source,p);
|
| - source[u_strlen(p)] = 0xE0;
|
| - source[u_strlen(p)+1] = 0;
|
| -
|
| - u_strcpy(target,q);
|
| - target[u_strlen(q)] = 0x61;
|
| - target[u_strlen(q)+1] = 0;
|
| -
|
| - doTest(col, source, target, UCOL_GREATER);
|
| -
|
| -/*
|
| - fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
|
| -*/
|
| -}
|
| -
|
| -static void testEquality(UCollator* col, const UChar* p,const UChar* q){
|
| -/*
|
| - UChar source[256] = { '\0'};
|
| - UChar target[256] = { '\0'};
|
| -*/
|
| -
|
| - doTest(col, p, q, UCOL_EQUAL);
|
| -/*
|
| - fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
|
| -*/
|
| -}
|
| -
|
| -static void testCollator(UCollator *coll, UErrorCode *status) {
|
| - const UChar *rules = NULL, *current = NULL;
|
| - int32_t ruleLen = 0;
|
| - uint32_t strength = 0;
|
| - uint32_t chOffset = 0; uint32_t chLen = 0;
|
| - uint32_t exOffset = 0; uint32_t exLen = 0;
|
| - uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
|
| - uint32_t firstEx = 0;
|
| -/* uint32_t rExpsLen = 0; */
|
| - uint32_t firstLen = 0;
|
| - UBool varT = FALSE; UBool top_ = TRUE;
|
| - uint16_t specs = 0;
|
| - UBool startOfRules = TRUE;
|
| - UBool lastReset = FALSE;
|
| - UBool before = FALSE;
|
| - uint32_t beforeStrength = 0;
|
| - UColTokenParser src;
|
| - UColOptionSet opts;
|
| -
|
| - UChar first[256];
|
| - UChar second[256];
|
| - UChar tempB[256];
|
| - uint32_t tempLen;
|
| - UChar *rulesCopy = NULL;
|
| - UParseError parseError;
|
| -
|
| - uprv_memset(&src, 0, sizeof(UColTokenParser));
|
| -
|
| - src.opts = &opts;
|
| -
|
| - rules = ucol_getRules(coll, &ruleLen);
|
| - if(U_SUCCESS(*status) && ruleLen > 0) {
|
| - rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
|
| - uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
|
| - src.current = src.source = rulesCopy;
|
| - src.end = rulesCopy+ruleLen;
|
| - src.extraCurrent = src.end;
|
| - src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
| - *first = *second = 0;
|
| -
|
| - /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
| - the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
| - while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
|
| - strength = src.parsedToken.strength;
|
| - chOffset = src.parsedToken.charsOffset;
|
| - chLen = src.parsedToken.charsLen;
|
| - exOffset = src.parsedToken.extensionOffset;
|
| - exLen = src.parsedToken.extensionLen;
|
| - prefixOffset = src.parsedToken.prefixOffset;
|
| - prefixLen = src.parsedToken.prefixLen;
|
| - specs = src.parsedToken.flags;
|
| -
|
| - startOfRules = FALSE;
|
| - varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
|
| - (void)varT; /* Suppress set but not used warning. */
|
| - top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
|
| - if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
|
| - second[0] = 0;
|
| - } else {
|
| - u_strncpy(second,src.source+chOffset, chLen);
|
| - second[chLen] = 0;
|
| -
|
| - if(exLen > 0 && firstEx == 0) {
|
| - u_strncat(first, src.source+exOffset, exLen);
|
| - first[firstLen+exLen] = 0;
|
| - }
|
| -
|
| - if(lastReset == TRUE && prefixLen != 0) {
|
| - u_strncpy(first+prefixLen, first, firstLen);
|
| - u_strncpy(first, src.source+prefixOffset, prefixLen);
|
| - first[firstLen+prefixLen] = 0;
|
| - firstLen = firstLen+prefixLen;
|
| - }
|
| -
|
| - if(before == TRUE) { /* swap first and second */
|
| - u_strcpy(tempB, first);
|
| - u_strcpy(first, second);
|
| - u_strcpy(second, tempB);
|
| -
|
| - tempLen = firstLen;
|
| - firstLen = chLen;
|
| - chLen = tempLen;
|
| -
|
| - tempLen = firstEx;
|
| - firstEx = exLen;
|
| - exLen = tempLen;
|
| - if(beforeStrength < strength) {
|
| - strength = beforeStrength;
|
| - }
|
| - }
|
| - }
|
| - lastReset = FALSE;
|
| -
|
| - switch(strength){
|
| - case UCOL_IDENTICAL:
|
| - testEquality(coll,first,second);
|
| - break;
|
| - case UCOL_PRIMARY:
|
| - testPrimary(coll,first,second);
|
| - break;
|
| - case UCOL_SECONDARY:
|
| - testSecondary(coll,first,second);
|
| - break;
|
| - case UCOL_TERTIARY:
|
| - testTertiary(coll,first,second);
|
| - break;
|
| - case UCOL_TOK_RESET:
|
| - lastReset = TRUE;
|
| - before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
|
| - if(before) {
|
| - beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
|
| - }
|
| - break;
|
| - default:
|
| - break;
|
| - }
|
| -
|
| - if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
|
| - before = FALSE;
|
| - } else {
|
| - firstLen = chLen;
|
| - firstEx = exLen;
|
| - u_strcpy(first, second);
|
| - }
|
| - }
|
| - uprv_free(src.source);
|
| - uprv_free(src.reorderCodes);
|
| - }
|
| -}
|
| -
|
| -static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
|
| - UCollator *UCA = (UCollator *)collator;
|
| - return ucol_strcoll(UCA, source, sLen, target, tLen);
|
| -}
|
| -
|
| -/*
|
| -static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
|
| -#if U_PLATFORM_HAS_WIN32_API
|
| - LCID lcid = (LCID)collator;
|
| - return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
|
| -#else
|
| - return 0;
|
| -#endif
|
| -}
|
| -*/
|
| -
|
| -static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
|
| - UChar s1, UChar s2,
|
| - const UChar *s, const uint32_t sLen,
|
| - const UChar *t, const uint32_t tLen) {
|
| - UChar source[256] = {0};
|
| - UChar target[256] = {0};
|
| -
|
| - source[0] = s1;
|
| - u_strcpy(source+1, s);
|
| - target[0] = s2;
|
| - u_strcpy(target+1, t);
|
| -
|
| - return func(collator, opts, source, sLen+1, target, tLen+1);
|
| -}
|
| -
|
| -static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
|
| - UChar s1, UChar s2,
|
| - const UChar *s, const uint32_t sLen,
|
| - const UChar *t, const uint32_t tLen) {
|
| - UChar source[256] = {0};
|
| - UChar target[256] = {0};
|
| -
|
| - u_strcpy(source, s);
|
| - source[sLen] = s1;
|
| - u_strcpy(target, t);
|
| - target[tLen] = s2;
|
| -
|
| - return func(collator, opts, source, sLen+1, target, tLen+1);
|
| -}
|
| -
|
| -static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
|
| - const UChar *s, const uint32_t sLen,
|
| - const UChar *t, const uint32_t tLen,
|
| - UCollationResult result) {
|
| - /*UChar fPrimary = 0x6d;*/
|
| - /*UChar sPrimary = 0x6e;*/
|
| - UChar fSecondary = 0x310d;
|
| - UChar sSecondary = 0x31a3;
|
| - UChar fTertiary = 0x310f;
|
| - UChar sTertiary = 0x31b7;
|
| -
|
| - UCollationResult oposite;
|
| - if(result == UCOL_EQUAL) {
|
| - return UCOL_IDENTICAL;
|
| - } else if(result == UCOL_GREATER) {
|
| - oposite = UCOL_LESS;
|
| - } else {
|
| - oposite = UCOL_GREATER;
|
| - }
|
| -
|
| - if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
|
| - return UCOL_PRIMARY;
|
| - } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
|
| - (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
|
| - return UCOL_SECONDARY;
|
| - } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
|
| - (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
|
| - return UCOL_TERTIARY;
|
| - } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
|
| - (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
|
| - return UCOL_QUATERNARY;
|
| - } else {
|
| - return UCOL_IDENTICAL;
|
| - }
|
| -}
|
| -
|
| -static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
|
| - uint32_t i = 0;
|
| -
|
| - if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
|
| - buffer[0] = '=';
|
| - buffer[1] = '=';
|
| - buffer[2] = '\0';
|
| - } else if(res == UCOL_GREATER) {
|
| - for(i = 0; i<strength+1; i++) {
|
| - buffer[i] = '>';
|
| - }
|
| - buffer[strength+1] = '\0';
|
| - } else {
|
| - for(i = 0; i<strength+1; i++) {
|
| - buffer[i] = '<';
|
| - }
|
| - buffer[strength+1] = '\0';
|
| - }
|
| -
|
| - return buffer;
|
| -}
|
| -
|
| -
|
| -
|
| -static void logFailure (const char *platform, const char *test,
|
| - const UChar *source, const uint32_t sLen,
|
| - const UChar *target, const uint32_t tLen,
|
| - UCollationResult realRes, uint32_t realStrength,
|
| - UCollationResult expRes, uint32_t expStrength, UBool error) {
|
| -
|
| - uint32_t i = 0;
|
| -
|
| - char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
|
| - static int32_t maxOutputLength = 0;
|
| - int32_t outputLength;
|
| -
|
| - *sEsc = *tEsc = *s = *t = 0;
|
| - if(error == TRUE) {
|
| - log_err("Difference between expected and generated order. Run test with -v for more info\n");
|
| - } else if(getTestOption(VERBOSITY_OPTION) == 0) {
|
| - return;
|
| - }
|
| - for(i = 0; i<sLen; i++) {
|
| - sprintf(b, "%04X", source[i]);
|
| - strcat(sEsc, "\\u");
|
| - strcat(sEsc, b);
|
| - strcat(s, b);
|
| - strcat(s, " ");
|
| - if(source[i] < 0x80) {
|
| - sprintf(b, "(%c)", source[i]);
|
| - strcat(sEsc, b);
|
| - }
|
| - }
|
| - for(i = 0; i<tLen; i++) {
|
| - sprintf(b, "%04X", target[i]);
|
| - strcat(tEsc, "\\u");
|
| - strcat(tEsc, b);
|
| - strcat(t, b);
|
| - strcat(t, " ");
|
| - if(target[i] < 0x80) {
|
| - sprintf(b, "(%c)", target[i]);
|
| - strcat(tEsc, b);
|
| - }
|
| - }
|
| -/*
|
| - strcpy(output, "[[ ");
|
| - strcat(output, sEsc);
|
| - strcat(output, getRelationSymbol(expRes, expStrength, relation));
|
| - strcat(output, tEsc);
|
| -
|
| - strcat(output, " : ");
|
| -
|
| - strcat(output, sEsc);
|
| - strcat(output, getRelationSymbol(realRes, realStrength, relation));
|
| - strcat(output, tEsc);
|
| - strcat(output, " ]] ");
|
| -
|
| - log_verbose("%s", output);
|
| -*/
|
| -
|
| -
|
| - strcpy(output, "DIFF: ");
|
| -
|
| - strcat(output, s);
|
| - strcat(output, " : ");
|
| - strcat(output, t);
|
| -
|
| - strcat(output, test);
|
| - strcat(output, ": ");
|
| -
|
| - strcat(output, sEsc);
|
| - strcat(output, getRelationSymbol(expRes, expStrength, relation));
|
| - strcat(output, tEsc);
|
| -
|
| - strcat(output, " ");
|
| -
|
| - strcat(output, platform);
|
| - strcat(output, ": ");
|
| -
|
| - strcat(output, sEsc);
|
| - strcat(output, getRelationSymbol(realRes, realStrength, relation));
|
| - strcat(output, tEsc);
|
| -
|
| - outputLength = (int32_t)strlen(output);
|
| - if(outputLength > maxOutputLength) {
|
| - maxOutputLength = outputLength;
|
| - U_ASSERT(outputLength < sizeof(output));
|
| - }
|
| -
|
| - log_verbose("%s\n", output);
|
| -
|
| -}
|
| -
|
| -/*
|
| -static void printOutRules(const UChar *rules) {
|
| - uint32_t len = u_strlen(rules);
|
| - uint32_t i = 0;
|
| - char toPrint;
|
| - uint32_t line = 0;
|
| -
|
| - fprintf(stdout, "Rules:");
|
| -
|
| - for(i = 0; i<len; i++) {
|
| - if(rules[i]<0x7f && rules[i]>=0x20) {
|
| - toPrint = (char)rules[i];
|
| - if(toPrint == '&') {
|
| - line = 1;
|
| - fprintf(stdout, "\n&");
|
| - } else if(toPrint == ';') {
|
| - fprintf(stdout, "<<");
|
| - line+=2;
|
| - } else if(toPrint == ',') {
|
| - fprintf(stdout, "<<<");
|
| - line+=3;
|
| - } else {
|
| - fprintf(stdout, "%c", toPrint);
|
| - line++;
|
| - }
|
| - } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
|
| - fprintf(stdout, "\\u%04X", rules[i]);
|
| - line+=6;
|
| - }
|
| - if(line>72) {
|
| - fprintf(stdout, "\n");
|
| - line = 0;
|
| - }
|
| - }
|
| -
|
| - log_verbose("\n");
|
| -
|
| -}
|
| -*/
|
| -
|
| -static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
|
| - uint32_t diffs = 0;
|
| - UCollationResult realResult;
|
| - uint32_t realStrength;
|
| -
|
| - uint32_t sLen = u_strlen(first);
|
| - uint32_t tLen = u_strlen(second);
|
| -
|
| - realResult = func(collator, opts, first, sLen, second, tLen);
|
| - realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
|
| -
|
| - if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
|
| - logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
|
| - diffs++;
|
| - } else if(realResult != UCOL_LESS || realStrength != strength) {
|
| - logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
|
| - diffs++;
|
| - }
|
| - return diffs;
|
| -}
|
| -
|
| -
|
| -static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
|
| - const UChar *rules = NULL, *current = NULL;
|
| - int32_t ruleLen = 0;
|
| - uint32_t strength = 0;
|
| - uint32_t chOffset = 0; uint32_t chLen = 0;
|
| - uint32_t exOffset = 0; uint32_t exLen = 0;
|
| - uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
|
| -/* uint32_t rExpsLen = 0; */
|
| - uint32_t firstLen = 0, secondLen = 0;
|
| - UBool varT = FALSE; UBool top_ = TRUE;
|
| - uint16_t specs = 0;
|
| - UBool startOfRules = TRUE;
|
| - UColTokenParser src;
|
| - UColOptionSet opts;
|
| -
|
| - UChar first[256];
|
| - UChar second[256];
|
| - UChar *rulesCopy = NULL;
|
| -
|
| - uint32_t UCAdiff = 0;
|
| - uint32_t Windiff = 1;
|
| - UParseError parseError;
|
| -
|
| - (void)top_; /* Suppress set but not used warnings. */
|
| - (void)varT;
|
| - (void)secondLen;
|
| - (void)prefixLen;
|
| - (void)prefixOffset;
|
| -
|
| - uprv_memset(&src, 0, sizeof(UColTokenParser));
|
| - src.opts = &opts;
|
| -
|
| - rules = ucol_getRules(coll, &ruleLen);
|
| -
|
| - /*printOutRules(rules);*/
|
| -
|
| - if(U_SUCCESS(*status) && ruleLen > 0) {
|
| - rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
|
| - uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
|
| - src.current = src.source = rulesCopy;
|
| - src.end = rulesCopy+ruleLen;
|
| - src.extraCurrent = src.end;
|
| - src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
| - *first = *second = 0;
|
| -
|
| - /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
| - the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
| - while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
|
| - strength = src.parsedToken.strength;
|
| - chOffset = src.parsedToken.charsOffset;
|
| - chLen = src.parsedToken.charsLen;
|
| - exOffset = src.parsedToken.extensionOffset;
|
| - exLen = src.parsedToken.extensionLen;
|
| - prefixOffset = src.parsedToken.prefixOffset;
|
| - prefixLen = src.parsedToken.prefixLen;
|
| - specs = src.parsedToken.flags;
|
| -
|
| - startOfRules = FALSE;
|
| - varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
|
| - top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
|
| -
|
| - u_strncpy(second,src.source+chOffset, chLen);
|
| - second[chLen] = 0;
|
| - secondLen = chLen;
|
| -
|
| - if(exLen > 0) {
|
| - u_strncat(first, src.source+exOffset, exLen);
|
| - first[firstLen+exLen] = 0;
|
| - firstLen += exLen;
|
| - }
|
| -
|
| - if(strength != UCOL_TOK_RESET) {
|
| - if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
|
| - UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
|
| - /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
|
| - }
|
| - }
|
| -
|
| -
|
| - firstLen = chLen;
|
| - u_strcpy(first, second);
|
| -
|
| - }
|
| - if(UCAdiff != 0 && Windiff != 0) {
|
| - log_verbose("\n");
|
| - }
|
| - if(UCAdiff == 0) {
|
| - log_verbose("No immediate difference with %s!\n", refName);
|
| - }
|
| - if(Windiff == 0) {
|
| - log_verbose("No immediate difference with Win32!\n");
|
| - }
|
| - uprv_free(src.source);
|
| - uprv_free(src.reorderCodes);
|
| - }
|
| -}
|
| -
|
| -/*
|
| - * Takes two CEs (lead and continuation) and
|
| - * compares them as CEs should be compared:
|
| - * primary vs. primary, secondary vs. secondary
|
| - * tertiary vs. tertiary
|
| - */
|
| -static int32_t compareCEs(uint32_t s1, uint32_t s2,
|
| - uint32_t t1, uint32_t t2) {
|
| - uint32_t s = 0, t = 0;
|
| - if(s1 == t1 && s2 == t2) {
|
| - return 0;
|
| - }
|
| - s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
|
| - t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
|
| - if(s < t) {
|
| - return -1;
|
| - } else if(s > t) {
|
| - return 1;
|
| - } else {
|
| - s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
|
| - t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
|
| - if(s < t) {
|
| - return -1;
|
| - } else if(s > t) {
|
| - return 1;
|
| - } else {
|
| - s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
|
| - t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
|
| - if(s < t) {
|
| - return -1;
|
| - } else {
|
| - return 1;
|
| - }
|
| - }
|
| - }
|
| -}
|
| -
|
| -typedef struct {
|
| - uint32_t startCE;
|
| - uint32_t startContCE;
|
| - uint32_t limitCE;
|
| - uint32_t limitContCE;
|
| -} indirectBoundaries;
|
| -
|
| -/* these values are used for finding CE values for indirect positioning. */
|
| -/* Indirect positioning is a mechanism for allowing resets on symbolic */
|
| -/* values. It only works for resets and you cannot tailor indirect names */
|
| -/* An indirect name can define either an anchor point or a range. An */
|
| -/* anchor point behaves in exactly the same way as a code point in reset */
|
| -/* would, except that it cannot be tailored. A range (we currently only */
|
| -/* know for the [top] range will explicitly set the upper bound for */
|
| -/* generated CEs, thus allowing for better control over how many CEs can */
|
| -/* be squeezed between in the range without performance penalty. */
|
| -/* In that respect, we use [top] for tailoring of locales that use CJK */
|
| -/* characters. Other indirect values are currently a pure convenience, */
|
| -/* they can be used to assure that the CEs will be always positioned in */
|
| -/* the same place relative to a point with known properties (e.g. first */
|
| -/* primary ignorable). */
|
| -static indirectBoundaries ucolIndirectBoundaries[15];
|
| -static UBool indirectBoundariesSet = FALSE;
|
| -static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
|
| - /* Set values for the top - TODO: once we have values for all the indirects, we are going */
|
| - /* to initalize here. */
|
| - ucolIndirectBoundaries[indexR].startCE = start[0];
|
| - ucolIndirectBoundaries[indexR].startContCE = start[1];
|
| - if(end) {
|
| - ucolIndirectBoundaries[indexR].limitCE = end[0];
|
| - ucolIndirectBoundaries[indexR].limitContCE = end[1];
|
| - } else {
|
| - ucolIndirectBoundaries[indexR].limitCE = 0;
|
| - ucolIndirectBoundaries[indexR].limitContCE = 0;
|
| - }
|
| -}
|
| -
|
| -static void testCEs(UCollator *coll, UErrorCode *status) {
|
| - const UChar *rules = NULL, *current = NULL;
|
| - int32_t ruleLen = 0;
|
| -
|
| - uint32_t strength = 0;
|
| - uint32_t maxStrength = UCOL_IDENTICAL;
|
| - uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
|
| - uint32_t lastCE;
|
| - uint32_t lastContCE;
|
| -
|
| - int32_t result = 0;
|
| - uint32_t chOffset = 0; uint32_t chLen = 0;
|
| - uint32_t exOffset = 0; uint32_t exLen = 0;
|
| - uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
|
| - uint32_t oldOffset = 0;
|
| -
|
| - /* uint32_t rExpsLen = 0; */
|
| - /* uint32_t firstLen = 0; */
|
| - uint16_t specs = 0;
|
| - UBool varT = FALSE; UBool top_ = TRUE;
|
| - UBool startOfRules = TRUE;
|
| - UBool before = FALSE;
|
| - UColTokenParser src;
|
| - UColOptionSet opts;
|
| - UParseError parseError;
|
| - UChar *rulesCopy = NULL;
|
| - collIterate *c = uprv_new_collIterate(status);
|
| - UCAConstants *consts = NULL;
|
| - uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
|
| - UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
|
| - const char *colLoc;
|
| - UCollator *UCA = ucol_open("root", status);
|
| -
|
| - (void)varT; /* Suppress set but not used warnings. */
|
| - (void)prefixLen;
|
| - (void)prefixOffset;
|
| - (void)exLen;
|
| - (void)exOffset;
|
| -
|
| - if (U_FAILURE(*status)) {
|
| - log_err("Could not open root collator %s\n", u_errorName(*status));
|
| - uprv_delete_collIterate(c);
|
| - return;
|
| - }
|
| -
|
| - colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
|
| - if (U_FAILURE(*status)) {
|
| - log_err("Could not get collator name: %s\n", u_errorName(*status));
|
| - ucol_close(UCA);
|
| - uprv_delete_collIterate(c);
|
| - return;
|
| - }
|
| -
|
| - uprv_memset(&src, 0, sizeof(UColTokenParser));
|
| -
|
| - consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
|
| - UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
|
| - /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
|
| - UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
|
| - UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
|
| -
|
| - baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
|
| -
|
| - src.opts = &opts;
|
| -
|
| - rules = ucol_getRules(coll, &ruleLen);
|
| -
|
| - src.invUCA = ucol_initInverseUCA(status);
|
| -
|
| - if(indirectBoundariesSet == FALSE) {
|
| - /* UCOL_RESET_TOP_VALUE */
|
| - setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
|
| - /* UCOL_FIRST_PRIMARY_IGNORABLE */
|
| - setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
|
| - /* UCOL_LAST_PRIMARY_IGNORABLE */
|
| - setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
|
| - /* UCOL_FIRST_SECONDARY_IGNORABLE */
|
| - setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
|
| - /* UCOL_LAST_SECONDARY_IGNORABLE */
|
| - setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
|
| - /* UCOL_FIRST_TERTIARY_IGNORABLE */
|
| - setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
|
| - /* UCOL_LAST_TERTIARY_IGNORABLE */
|
| - setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
|
| - /* UCOL_FIRST_VARIABLE */
|
| - setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
|
| - /* UCOL_LAST_VARIABLE */
|
| - setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
|
| - /* UCOL_FIRST_NON_VARIABLE */
|
| - setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
|
| - /* UCOL_LAST_NON_VARIABLE */
|
| - setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
|
| - /* UCOL_FIRST_IMPLICIT */
|
| - setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
|
| - /* UCOL_LAST_IMPLICIT */
|
| - setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
|
| - /* UCOL_FIRST_TRAILING */
|
| - setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
|
| - /* UCOL_LAST_TRAILING */
|
| - setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
|
| - ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
|
| - indirectBoundariesSet = TRUE;
|
| - }
|
| -
|
| -
|
| - if(U_SUCCESS(*status) && ruleLen > 0) {
|
| - rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
|
| - uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
|
| - src.current = src.source = rulesCopy;
|
| - src.end = rulesCopy+ruleLen;
|
| - src.extraCurrent = src.end;
|
| - src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
| -
|
| - /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
| - the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
| - while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
|
| - strength = src.parsedToken.strength;
|
| - chOffset = src.parsedToken.charsOffset;
|
| - chLen = src.parsedToken.charsLen;
|
| - exOffset = src.parsedToken.extensionOffset;
|
| - exLen = src.parsedToken.extensionLen;
|
| - prefixOffset = src.parsedToken.prefixOffset;
|
| - prefixLen = src.parsedToken.prefixLen;
|
| - specs = src.parsedToken.flags;
|
| -
|
| - startOfRules = FALSE;
|
| - varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
|
| - top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
|
| -
|
| - uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
|
| -
|
| - currCE = ucol_getNextCE(coll, c, status);
|
| - if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
|
| - log_verbose("Thai prevowel detected. Will pick next CE\n");
|
| - currCE = ucol_getNextCE(coll, c, status);
|
| - }
|
| -
|
| - currContCE = ucol_getNextCE(coll, c, status);
|
| - if(!isContinuation(currContCE)) {
|
| - currContCE = 0;
|
| - }
|
| -
|
| - /* we need to repack CEs here */
|
| -
|
| - if(strength == UCOL_TOK_RESET) {
|
| - before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
|
| - if(top_ == TRUE) {
|
| - int32_t tokenIndex = src.parsedToken.indirectIndex;
|
| -
|
| - nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
|
| - nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
|
| - } else {
|
| - nextCE = baseCE = currCE;
|
| - nextContCE = baseContCE = currContCE;
|
| - }
|
| - maxStrength = UCOL_IDENTICAL;
|
| - } else {
|
| - if(strength < maxStrength) {
|
| - maxStrength = strength;
|
| - if(baseCE == UCOL_RESET_TOP_VALUE) {
|
| - log_verbose("Resetting to [top]\n");
|
| - nextCE = UCOL_NEXT_TOP_VALUE;
|
| - nextContCE = UCOL_NEXT_TOP_CONT;
|
| - } else {
|
| - result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
|
| - }
|
| - if(result < 0) {
|
| - if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
|
| - log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
|
| - return;
|
| - } else {
|
| - log_err("%s: couldn't find the CE\n", colLoc);
|
| - return;
|
| - }
|
| - }
|
| - }
|
| -
|
| - currCE &= 0xFFFFFF3F;
|
| - currContCE &= 0xFFFFFFBF;
|
| -
|
| - if(maxStrength == UCOL_IDENTICAL) {
|
| - if(baseCE != currCE || baseContCE != currContCE) {
|
| - log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
|
| - }
|
| - } else {
|
| - if(strength == UCOL_IDENTICAL) {
|
| - if(lastCE != currCE || lastContCE != currContCE) {
|
| - log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
|
| - }
|
| - } else {
|
| - if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
|
| - /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
|
| - log_err("%s: current CE is not less than base CE\n", colLoc);
|
| - }
|
| - if(!before) {
|
| - if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
|
| - /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
|
| - log_err("%s: sequence of generated CEs is broken\n", colLoc);
|
| - }
|
| - } else {
|
| - before = FALSE;
|
| - if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
|
| - /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
|
| - log_err("%s: sequence of generated CEs is broken\n", colLoc);
|
| - }
|
| - }
|
| - }
|
| - }
|
| -
|
| - }
|
| -
|
| - oldOffset = chOffset;
|
| - lastCE = currCE & 0xFFFFFF3F;
|
| - lastContCE = currContCE & 0xFFFFFFBF;
|
| - }
|
| - uprv_free(src.source);
|
| - uprv_free(src.reorderCodes);
|
| - }
|
| - ucol_close(UCA);
|
| - uprv_delete_collIterate(c);
|
| -}
|
| -
|
| -#if 0
|
| -/* these locales are now picked from index RB */
|
| -static const char* localesToTest[] = {
|
| -"ar", "bg", "ca", "cs", "da",
|
| -"el", "en_BE", "en_US_POSIX",
|
| -"es", "et", "fi", "fr", "hi",
|
| -"hr", "hu", "is", "iw", "ja",
|
| -"ko", "lt", "lv", "mk", "mt",
|
| -"nb", "nn", "nn_NO", "pl", "ro",
|
| -"ru", "sh", "sk", "sl", "sq",
|
| -"sr", "sv", "th", "tr", "uk",
|
| -"vi", "zh", "zh_TW"
|
| -};
|
| -#endif
|
| -
|
| -static const char* rulesToTest[] = {
|
| - /* Funky fa rule */
|
| - "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
|
| - /*"& Z < p, P",*/
|
| - /* Cui Mins rules */
|
| - "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
|
| - "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
|
| - "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
|
| - "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
|
| - "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
|
| - "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
|
| - "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
|
| -};
|
| -
|
| -
|
| -static void TestCollations(void) {
|
| - int32_t noOfLoc = uloc_countAvailable();
|
| - int32_t i = 0, j = 0;
|
| -
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - char cName[256];
|
| - UChar name[256];
|
| - int32_t nameSize;
|
| -
|
| -
|
| - const char *locName = NULL;
|
| - UCollator *coll = NULL;
|
| - UCollator *UCA = ucol_open("", &status);
|
| - UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
|
| - if (U_FAILURE(status)) {
|
| - log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
|
| - return;
|
| - }
|
| - ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
|
| -
|
| - for(i = 0; i<noOfLoc; i++) {
|
| - status = U_ZERO_ERROR;
|
| - locName = uloc_getAvailable(i);
|
| - if(uprv_strcmp("ja", locName) == 0) {
|
| - log_verbose("Don't know how to test prefixes\n");
|
| - continue;
|
| - }
|
| - if(hasCollationElements(locName)) {
|
| - nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
|
| - for(j = 0; j<nameSize; j++) {
|
| - cName[j] = (char)name[j];
|
| - }
|
| - cName[nameSize] = 0;
|
| - log_verbose("\nTesting locale %s (%s)\n", locName, cName);
|
| - coll = ucol_open(locName, &status);
|
| - if(U_SUCCESS(status)) {
|
| - testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
|
| - ucol_close(coll);
|
| - } else {
|
| - log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
|
| - status = U_ZERO_ERROR;
|
| - }
|
| - }
|
| - }
|
| - ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
|
| - ucol_close(UCA);
|
| -}
|
| -
|
| -static void RamsRulesTest(void) {
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - int32_t i = 0;
|
| - UCollator *coll = NULL;
|
| - UChar rule[2048];
|
| - uint32_t ruleLen;
|
| - int32_t noOfLoc = uloc_countAvailable();
|
| - const char *locName = NULL;
|
| -
|
| - log_verbose("RamsRulesTest\n");
|
| -
|
| - if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
|
| - /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
|
| - return;
|
| - }
|
| -
|
| - for(i = 0; i<noOfLoc; i++) {
|
| - locName = uloc_getAvailable(i);
|
| - if(hasCollationElements(locName)) {
|
| - if (uprv_strcmp("ja", locName)==0) {
|
| - log_verbose("Don't know how to test Japanese because of prefixes\n");
|
| - continue;
|
| - }
|
| - if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
|
| - log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
|
| - continue;
|
| - }
|
| - if (uprv_strcmp("bn", locName)==0 ||
|
| - uprv_strcmp("bs", locName)==0 || /* Add due to import per cldrbug 5647 */
|
| - uprv_strcmp("bs_Cyrl", locName)==0 || /* Add due to import per cldrbug 5647 */
|
| - uprv_strcmp("en_US_POSIX", locName)==0 ||
|
| - uprv_strcmp("fa", locName)==0 || /* Add in #10222 with CLDR 24 integration */
|
| - uprv_strcmp("fa_AF", locName)==0 || /* Add due to import per cldrbug 5647 */
|
| - uprv_strcmp("gl", locName)==0 || /* Add due to import per cldrbug 5647 */
|
| - uprv_strcmp("gl_ES", locName)==0 || /* Add due to import per cldrbug 5647 */
|
| - uprv_strcmp("he", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
|
| - uprv_strcmp("he_IL", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
|
| - uprv_strcmp("km", locName)==0 ||
|
| - uprv_strcmp("km_KH", locName)==0 ||
|
| - uprv_strcmp("my", locName)==0 ||
|
| - uprv_strcmp("ps", locName)==0 || /* Add in #10222 with CLDR 24 integration */
|
| - uprv_strcmp("si", locName)==0 ||
|
| - uprv_strcmp("si_LK", locName)==0 ||
|
| - uprv_strcmp("sr_Latn", locName)==0 || /* Add due to import per cldrbug 5647 */
|
| - uprv_strcmp("th", locName)==0 ||
|
| - uprv_strcmp("th_TH", locName)==0 ||
|
| - uprv_strcmp("zh", locName)==0 ||
|
| - uprv_strcmp("zh_Hant", locName)==0
|
| - ) {
|
| - if(log_knownIssue("6040", NULL)) {
|
| - log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
|
| - continue;
|
| - }
|
| - }
|
| - log_verbose("Testing locale %s\n", locName);
|
| - status = U_ZERO_ERROR;
|
| - coll = ucol_open(locName, &status);
|
| - if(U_SUCCESS(status)) {
|
| - if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
|
| - if(coll->image->jamoSpecial == TRUE) {
|
| - log_err("%s has special JAMOs\n", locName);
|
| - }
|
| - ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
|
| - testCollator(coll, &status);
|
| - testCEs(coll, &status);
|
| - } else {
|
| - log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
|
| - }
|
| - ucol_close(coll);
|
| - } else {
|
| - log_err("Could not open %s: %s\n", locName, u_errorName(status));
|
| - }
|
| - }
|
| - }
|
| -
|
| - for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
|
| - log_verbose("Testing rule: %s\n", rulesToTest[i]);
|
| - ruleLen = u_unescape(rulesToTest[i], rule, 2048);
|
| - status = U_ZERO_ERROR;
|
| - coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
|
| - if(U_SUCCESS(status)) {
|
| - testCollator(coll, &status);
|
| - testCEs(coll, &status);
|
| - ucol_close(coll);
|
| - } else {
|
| - log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
|
| - }
|
| - }
|
| -
|
| -}
|
| -
|
| -static void IsTailoredTest(void) {
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - uint32_t i = 0;
|
| - UCollator *coll = NULL;
|
| - UChar rule[2048];
|
| - UChar tailored[2048];
|
| - UChar notTailored[2048];
|
| - uint32_t ruleLen, tailoredLen, notTailoredLen;
|
| -
|
| - log_verbose("IsTailoredTest\n");
|
| -
|
| - u_uastrcpy(rule, "&Z < A, B, C;c < d");
|
| - ruleLen = u_strlen(rule);
|
| -
|
| - u_uastrcpy(tailored, "ABCcd");
|
| - tailoredLen = u_strlen(tailored);
|
| -
|
| - u_uastrcpy(notTailored, "ZabD");
|
| - notTailoredLen = u_strlen(notTailored);
|
| -
|
| - coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
|
| - if(U_SUCCESS(status)) {
|
| - for(i = 0; i<tailoredLen; i++) {
|
| - if(!ucol_isTailored(coll, tailored[i], &status)) {
|
| - log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
|
| - }
|
| - }
|
| - for(i = 0; i<notTailoredLen; i++) {
|
| - if(ucol_isTailored(coll, notTailored[i], &status)) {
|
| - log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
|
| - }
|
| - }
|
| - ucol_close(coll);
|
| - }
|
| - else {
|
| - log_err_status(status, "Can't tailor rules\n");
|
| - }
|
| - /* Code coverage */
|
| - status = U_ZERO_ERROR;
|
| - coll = ucol_open("ja", &status);
|
| - if(!ucol_isTailored(coll, 0x4E9C, &status)) {
|
| - log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
|
| - }
|
| - ucol_close(coll);
|
| -}
|
| -
|
| -
|
| const static char chTest[][20] = {
|
| "c",
|
| "C",
|
| @@ -1573,6 +461,7 @@ static void TestChMove(void) {
|
|
|
|
|
|
|
| +/*
|
| const static char impTest[][20] = {
|
| "\\u4e00",
|
| "a",
|
| @@ -1581,6 +470,7 @@ const static char impTest[][20] = {
|
| "B",
|
| "\\u4e01"
|
| };
|
| +*/
|
|
|
|
|
| static void TestImplicitTailoring(void) {
|
| @@ -1589,7 +479,12 @@ static void TestImplicitTailoring(void) {
|
| const char *data[10];
|
| const uint32_t len;
|
| } tests[] = {
|
| - { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
|
| + {
|
| + /* Tailor b and c before U+4E00. */
|
| + "&[before 1]\\u4e00 < b < c "
|
| + /* Now, before U+4E00 is c; put d and e after that. */
|
| + "&[before 1]\\u4e00 < d < e",
|
| + { "b", "c", "d", "e", "\\u4e00"}, 5 },
|
| { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
|
| { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
|
| { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
|
| @@ -1934,205 +829,6 @@ static void TestJ815(void) {
|
| }
|
|
|
|
|
| -/*
|
| -"& a < b < c < d& r < c", "& a < b < d& r < c",
|
| -"& a < b < c < d& c < m", "& a < b < c < m < d",
|
| -"& a < b < c < d& a < m", "& a < m < b < c < d",
|
| -"& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
|
| -"& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
|
| -"& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
|
| -"& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
|
| -"& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
|
| -"& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
|
| -*/
|
| -static void TestRedundantRules(void) {
|
| - int32_t i;
|
| -
|
| - static const struct {
|
| - const char *rules;
|
| - const char *expectedRules;
|
| - const char *testdata[8];
|
| - uint32_t testdatalen;
|
| - } tests[] = {
|
| - /* this test conflicts with positioning of CODAN placeholder */
|
| - /*{
|
| - "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
|
| - "&\\u2089<<<x",
|
| - {"\\u2089", "x"}, 2
|
| - }, */
|
| - /* this test conflicts with the [before x] syntax tightening */
|
| - /*{
|
| - "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
|
| - "&\\u0252<<<x",
|
| - {"\\u0252", "x"}, 2
|
| - }, */
|
| - /* this test conflicts with the [before x] syntax tightening */
|
| - /*{
|
| - "& a < b <<< c << d <<< e& [before 1] e <<< x",
|
| - "& a <<< x < b <<< c << d <<< e",
|
| - {"a", "x", "b", "c", "d", "e"}, 6
|
| - }, */
|
| - {
|
| - "& a < b < c < d& [before 1] c < m",
|
| - "& a < b < m < c < d",
|
| - {"a", "b", "m", "c", "d"}, 5
|
| - },
|
| - {
|
| - "& a < b <<< c << d <<< e& [before 3] e <<< x",
|
| - "& a < b <<< c << d <<< x <<< e",
|
| - {"a", "b", "c", "d", "x", "e"}, 6
|
| - },
|
| - /* this test conflicts with the [before x] syntax tightening */
|
| - /* {
|
| - "& a < b <<< c << d <<< e& [before 2] e <<< x",
|
| - "& a < b <<< c <<< x << d <<< e",
|
| - {"a", "b", "c", "x", "d", "e"},, 6
|
| - }, */
|
| - {
|
| - "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
|
| - "& a < b <<< c << d <<< e <<< f < x < g",
|
| - {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
|
| - },
|
| - {
|
| - "& a <<< b << c < d& a < m",
|
| - "& a <<< b << c < m < d",
|
| - {"a", "b", "c", "m", "d"}, 5
|
| - },
|
| - {
|
| - "&a<b<<b\\u0301 &z<b",
|
| - "&a<b\\u0301 &z<b",
|
| - {"a", "b\\u0301", "z", "b"}, 4
|
| - },
|
| - {
|
| - "&z<m<<<q<<<m",
|
| - "&z<q<<<m",
|
| - {"z", "q", "m"},3
|
| - },
|
| - {
|
| - "&z<<<m<q<<<m",
|
| - "&z<q<<<m",
|
| - {"z", "q", "m"}, 3
|
| - },
|
| - {
|
| - "& a < b < c < d& r < c",
|
| - "& a < b < d& r < c",
|
| - {"a", "b", "d"}, 3
|
| - },
|
| - {
|
| - "& a < b < c < d& r < c",
|
| - "& a < b < d& r < c",
|
| - {"r", "c"}, 2
|
| - },
|
| - {
|
| - "& a < b < c < d& c < m",
|
| - "& a < b < c < m < d",
|
| - {"a", "b", "c", "m", "d"}, 5
|
| - },
|
| - {
|
| - "& a < b < c < d& a < m",
|
| - "& a < m < b < c < d",
|
| - {"a", "m", "b", "c", "d"}, 5
|
| - }
|
| - };
|
| -
|
| -
|
| - UCollator *credundant = NULL;
|
| - UCollator *cresulting = NULL;
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UChar rlz[2048] = { 0 };
|
| - uint32_t rlen = 0;
|
| -
|
| - for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
|
| - log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
|
| - rlen = u_unescape(tests[i].rules, rlz, 2048);
|
| -
|
| - credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
|
| - if(status == U_FILE_ACCESS_ERROR) {
|
| - log_data_err("Is your data around?\n");
|
| - return;
|
| - } else if(U_FAILURE(status)) {
|
| - log_err("Error opening collator\n");
|
| - return;
|
| - }
|
| -
|
| - rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
|
| - cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
|
| -
|
| - testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
|
| -
|
| - ucol_close(credundant);
|
| - ucol_close(cresulting);
|
| -
|
| - log_verbose("testing using data\n");
|
| -
|
| - genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
|
| - }
|
| -
|
| -}
|
| -
|
| -static void TestExpansionSyntax(void) {
|
| - int32_t i;
|
| -
|
| - const static char *rules[] = {
|
| - "&AE <<< a << b <<< c &d <<< f",
|
| - "&AE <<< a <<< b << c << d < e < f <<< g",
|
| - "&AE <<< B <<< C / D <<< F"
|
| - };
|
| -
|
| - const static char *expectedRules[] = {
|
| - "&A <<< a / E << b / E <<< c /E &d <<< f",
|
| - "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
|
| - "&A <<< B / E <<< C / ED <<< F / E"
|
| - };
|
| -
|
| - const static char *testdata[][8] = {
|
| - {"AE", "a", "b", "c"},
|
| - {"AE", "a", "b", "c", "d", "e", "f", "g"},
|
| - {"AE", "B", "C"} /* / ED <<< F / E"},*/
|
| - };
|
| -
|
| - const static uint32_t testdatalen[] = {
|
| - 4,
|
| - 8,
|
| - 3
|
| - };
|
| -
|
| -
|
| -
|
| - UCollator *credundant = NULL;
|
| - UCollator *cresulting = NULL;
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UChar rlz[2048] = { 0 };
|
| - uint32_t rlen = 0;
|
| -
|
| - for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
|
| - log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
|
| - rlen = u_unescape(rules[i], rlz, 2048);
|
| -
|
| - credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
|
| - if(status == U_FILE_ACCESS_ERROR) {
|
| - log_data_err("Is your data around?\n");
|
| - return;
|
| - } else if(U_FAILURE(status)) {
|
| - log_err("Error opening collator\n");
|
| - return;
|
| - }
|
| - rlen = u_unescape(expectedRules[i], rlz, 2048);
|
| - cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
|
| -
|
| - /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
|
| - /* as a hard error test, but only in information mode */
|
| - testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
|
| -
|
| - ucol_close(credundant);
|
| - ucol_close(cresulting);
|
| -
|
| - log_verbose("testing using data\n");
|
| -
|
| - genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
|
| - }
|
| -}
|
| -
|
| static void TestCase(void)
|
| {
|
| const static UChar gRules[MAX_TOKEN_LEN] =
|
| @@ -2246,13 +942,13 @@ static void TestCase(void)
|
| };
|
| log_verbose("mixed case test\n");
|
| log_verbose("lower first, case level off\n");
|
| - genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
|
| + genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
|
| log_verbose("upper first, case level off\n");
|
| - genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
|
| + genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
|
| log_verbose("lower first, case level on\n");
|
| - genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
|
| + genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
|
| log_verbose("upper first, case level on\n");
|
| - genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
|
| + genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
|
| }
|
|
|
| }
|
| @@ -2560,25 +1256,32 @@ static void TestHangulTailoring(void) {
|
| log_err("Unable to open collator with rules %s\n", rules);
|
| }
|
|
|
| - log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
|
| - ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
|
| - genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
|
| -
|
| ucol_close(coll);
|
|
|
| log_verbose("Using ko__LOTUS locale\n");
|
| genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
|
| }
|
|
|
| +/*
|
| + * The secondary/tertiary compression middle byte
|
| + * as used by the current implementation.
|
| + * Subject to change as the sort key compression changes.
|
| + * See class CollationKeys.
|
| + */
|
| +enum {
|
| + SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */
|
| + TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
|
| +};
|
| +
|
| static void TestCompressOverlap(void) {
|
| UChar secstr[150];
|
| UChar tertstr[150];
|
| UErrorCode status = U_ZERO_ERROR;
|
| UCollator *coll;
|
| - char result[200];
|
| + uint8_t result[500];
|
| uint32_t resultlen;
|
| int count = 0;
|
| - char *tempptr;
|
| + uint8_t *tempptr;
|
|
|
| coll = ucol_open("", &status);
|
|
|
| @@ -2598,29 +1301,29 @@ static void TestCompressOverlap(void) {
|
|
|
| /* no compression secstr should have 150 secondary bytes, tertstr should
|
| have 150 tertiary bytes.
|
| - with correct overlapping compression, secstr should have 4 secondary
|
| - bytes, tertstr should have > 2 tertiary bytes */
|
| - resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
|
| + with correct compression, secstr should have 6 secondary
|
| + bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
|
| + resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
|
| (void)resultlen; /* Suppress set but not used warning. */
|
| - tempptr = uprv_strchr(result, 1) + 1;
|
| + tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
|
| while (*(tempptr + 1) != 1) {
|
| /* the last secondary collation element is not checked since it is not
|
| part of the compression */
|
| - if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
|
| - log_err("Secondary compression overlapped\n");
|
| + if (*tempptr < SEC_COMMON_MIDDLE) {
|
| + log_err("Secondary top down compression overlapped\n");
|
| }
|
| tempptr ++;
|
| }
|
|
|
| /* tertiary top/bottom/common for en_US is similar to the secondary
|
| top/bottom/common */
|
| - resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
|
| - tempptr = uprv_strrchr(result, 1) + 1;
|
| + resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
|
| + tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
|
| while (*(tempptr + 1) != 0) {
|
| /* the last secondary collation element is not checked since it is not
|
| part of the compression */
|
| - if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
|
| - log_err("Tertiary compression overlapped\n");
|
| + if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
|
| + log_err("Tertiary top down compression overlapped\n");
|
| }
|
| tempptr ++;
|
| }
|
| @@ -2628,26 +1331,26 @@ static void TestCompressOverlap(void) {
|
| /* bottom up compression ------------------------------------- */
|
| secstr[count] = 0;
|
| tertstr[count] = 0;
|
| - resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
|
| - tempptr = uprv_strchr(result, 1) + 1;
|
| + resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
|
| + tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
|
| while (*(tempptr + 1) != 1) {
|
| /* the last secondary collation element is not checked since it is not
|
| part of the compression */
|
| - if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
|
| - log_err("Secondary compression overlapped\n");
|
| + if (*tempptr > SEC_COMMON_MIDDLE) {
|
| + log_err("Secondary bottom up compression overlapped\n");
|
| }
|
| tempptr ++;
|
| }
|
|
|
| /* tertiary top/bottom/common for en_US is similar to the secondary
|
| top/bottom/common */
|
| - resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
|
| - tempptr = uprv_strrchr(result, 1) + 1;
|
| + resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
|
| + tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
|
| while (*(tempptr + 1) != 0) {
|
| /* the last secondary collation element is not checked since it is not
|
| part of the compression */
|
| - if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
|
| - log_err("Tertiary compression overlapped\n");
|
| + if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
|
| + log_err("Tertiary bottom up compression overlapped\n");
|
| }
|
| tempptr ++;
|
| }
|
| @@ -2707,6 +1410,13 @@ static void TestContraction(void) {
|
| {0x0063 /* 'c' */, 0x0068 /* 'h' */},
|
| {0x0063 /* 'c' */, 0x006C /* 'l' */}
|
| };
|
| +#if 0
|
| + /*
|
| + * These pairs of rule strings are not guaranteed to yield the very same mappings.
|
| + * In fact, LDML 24 recommends an improved way of creating mappings
|
| + * which always yields different mappings for such pairs. See
|
| + * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
|
| + */
|
| const static char *testrules3[] = {
|
| "&z < xyz &xyzw << B",
|
| "&z < xyz &xyz << B / w",
|
| @@ -2717,6 +1427,7 @@ static void TestContraction(void) {
|
| "&a\\ud800\\udc00m << B",
|
| "&a << B / \\ud800\\udc00m",
|
| };
|
| +#endif
|
|
|
| UErrorCode status = U_ZERO_ERROR;
|
| UCollator *coll;
|
| @@ -2782,8 +1493,9 @@ static void TestContraction(void) {
|
| return;
|
| }
|
| ucol_close(coll);
|
| -
|
| +#if 0 /* see above */
|
| for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
|
| + log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
|
| UCollator *coll1,
|
| *coll2;
|
| UCollationElements *iter1,
|
| @@ -2810,8 +1522,11 @@ static void TestContraction(void) {
|
| return;
|
| }
|
| while (ce != UCOL_NULLORDER) {
|
| - if (ce != (uint32_t)ucol_next(iter2, &status)) {
|
| - log_err("CEs does not match\n");
|
| + uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
|
| + if (ce == ce2) {
|
| + log_verbose("CEs match: %08x\n", ce);
|
| + } else {
|
| + log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
|
| return;
|
| }
|
| ce = ucol_next(iter1, &status);
|
| @@ -2829,11 +1544,23 @@ static void TestContraction(void) {
|
| ucol_close(coll1);
|
| ucol_close(coll2);
|
| }
|
| +#endif
|
| }
|
|
|
| static void TestExpansion(void) {
|
| const static char *testrules[] = {
|
| +#if 0
|
| + /*
|
| + * This seems to have tested that M was not mapped to an expansion.
|
| + * I believe the old builder just did that because it computed the extension CEs
|
| + * at the very end, which was a bug.
|
| + * Among other problems, it violated the core tailoring principle
|
| + * by making an earlier rule depend on a later one.
|
| + * And, of course, if M did not get an expansion, then it was primary different from K,
|
| + * unlike what the rule &K<<M says.
|
| + */
|
| "&J << K / B & K << M",
|
| +#endif
|
| "&J << K / B << M"
|
| };
|
| const static UChar testdata[][3] = {
|
| @@ -2983,207 +1710,81 @@ static void TestBocsuCoverage(void) {
|
|
|
| static void TestVariableTopSetting(void) {
|
| UErrorCode status = U_ZERO_ERROR;
|
| - const UChar *current = NULL;
|
| uint32_t varTopOriginal = 0, varTop1, varTop2;
|
| UCollator *coll = ucol_open("", &status);
|
| if(U_SUCCESS(status)) {
|
|
|
| - uint32_t strength = 0;
|
| - uint16_t specs = 0;
|
| - uint32_t chOffset = 0;
|
| - uint32_t chLen = 0;
|
| - uint32_t exOffset = 0;
|
| - uint32_t exLen = 0;
|
| - uint32_t oldChOffset = 0;
|
| - uint32_t oldChLen = 0;
|
| - uint32_t oldExOffset = 0;
|
| - uint32_t oldExLen = 0;
|
| - uint32_t prefixOffset = 0;
|
| - uint32_t prefixLen = 0;
|
| -
|
| - UBool startOfRules = TRUE;
|
| - UColTokenParser src;
|
| - UColOptionSet opts;
|
| -
|
| - UChar *rulesCopy = NULL;
|
| - uint32_t rulesLen;
|
| -
|
| - UCollationResult result;
|
| + static const UChar nul = 0;
|
| + static const UChar space = 0x20;
|
| + static const UChar dot = 0x2e; /* punctuation */
|
| + static const UChar degree = 0xb0; /* symbol */
|
| + static const UChar dollar = 0x24; /* currency symbol */
|
| + static const UChar zero = 0x30; /* digit */
|
|
|
| - UChar first[256] = { 0 };
|
| - UChar second[256] = { 0 };
|
| - UParseError parseError;
|
| - int32_t myQ = getTestOption(QUICK_OPTION);
|
| -
|
| - (void)prefixLen; /* Suppress set but not used warnings. */
|
| - (void)prefixOffset;
|
| - (void)specs;
|
| -
|
| - uprv_memset(&src, 0, sizeof(UColTokenParser));
|
| -
|
| - src.opts = &opts;
|
| + varTopOriginal = ucol_getVariableTop(coll, &status);
|
| + log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
|
| + ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
|
|
|
| - if(getTestOption(QUICK_OPTION) <= 0) {
|
| - setTestOption(QUICK_OPTION, 1);
|
| + varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
|
| + varTop2 = ucol_getVariableTop(coll, &status);
|
| + log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
|
| + if(U_FAILURE(status) || varTop1 != varTop2 ||
|
| + !ucol_equal(coll, &nul, 0, &space, 1) ||
|
| + ucol_equal(coll, &nul, 0, &dot, 1) ||
|
| + ucol_equal(coll, &nul, 0, °ree, 1) ||
|
| + ucol_equal(coll, &nul, 0, &dollar, 1) ||
|
| + ucol_equal(coll, &nul, 0, &zero, 1) ||
|
| + ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
|
| + log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
|
| }
|
|
|
| - /* this test will fail when normalization is turned on */
|
| - /* therefore we always turn off exhaustive mode for it */
|
| - { /* QUICK > 0*/
|
| - log_verbose("Slide variable top over UCARules\n");
|
| - rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
|
| - rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
|
| - rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
|
| -
|
| - if(U_SUCCESS(status) && rulesLen > 0) {
|
| - ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
|
| - src.current = src.source = rulesCopy;
|
| - src.end = rulesCopy+rulesLen;
|
| - src.extraCurrent = src.end;
|
| - src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
| -
|
| - /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
| - the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
| - while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
|
| - strength = src.parsedToken.strength;
|
| - chOffset = src.parsedToken.charsOffset;
|
| - chLen = src.parsedToken.charsLen;
|
| - exOffset = src.parsedToken.extensionOffset;
|
| - exLen = src.parsedToken.extensionLen;
|
| - prefixOffset = src.parsedToken.prefixOffset;
|
| - prefixLen = src.parsedToken.prefixLen;
|
| - specs = src.parsedToken.flags;
|
| -
|
| - startOfRules = FALSE;
|
| - {
|
| - log_verbose("%04X %d ", *(src.source+chOffset), chLen);
|
| - }
|
| - if(strength == UCOL_PRIMARY) {
|
| - status = U_ZERO_ERROR;
|
| - varTopOriginal = ucol_getVariableTop(coll, &status);
|
| - varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
|
| - if(U_FAILURE(status)) {
|
| - char buffer[256];
|
| - char *buf = buffer;
|
| - uint32_t i = 0, j;
|
| - uint32_t CE = UCOL_NO_MORE_CES;
|
| -
|
| - /* before we start screaming, let's see if there is a problem with the rules */
|
| - UErrorCode collIterateStatus = U_ZERO_ERROR;
|
| - collIterate *s = uprv_new_collIterate(&collIterateStatus);
|
| - uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
|
| -
|
| - CE = ucol_getNextCE(coll, s, &status);
|
| - (void)CE; /* Suppress set but not used warning. */
|
| -
|
| - for(i = 0; i < oldChLen; i++) {
|
| - j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
|
| - buf += j;
|
| - }
|
| - if(status == U_PRIMARY_TOO_LONG_ERROR) {
|
| - log_verbose("= Expected failure for %s =", buffer);
|
| - } else {
|
| - if(uprv_collIterateAtEnd(s)) {
|
| - log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
|
| - oldChOffset, u_errorName(status), buffer);
|
| - } else {
|
| - log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
|
| - buffer);
|
| - }
|
| - }
|
| - uprv_delete_collIterate(s);
|
| - }
|
| - varTop2 = ucol_getVariableTop(coll, &status);
|
| - if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
|
| - log_err("cannot retrieve set varTop value!\n");
|
| - continue;
|
| - }
|
| + varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
|
| + varTop2 = ucol_getVariableTop(coll, &status);
|
| + log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
|
| + if(U_FAILURE(status) || varTop1 != varTop2 ||
|
| + !ucol_equal(coll, &nul, 0, &space, 1) ||
|
| + !ucol_equal(coll, &nul, 0, &dot, 1) ||
|
| + ucol_equal(coll, &nul, 0, °ree, 1) ||
|
| + ucol_equal(coll, &nul, 0, &dollar, 1) ||
|
| + ucol_equal(coll, &nul, 0, &zero, 1) ||
|
| + ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) {
|
| + log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
|
| + }
|
|
|
| - if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
|
| -
|
| - u_strncpy(first, src.source+oldChOffset, oldChLen);
|
| - u_strncpy(first+oldChLen, src.source+chOffset, chLen);
|
| - u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
|
| - first[2*oldChLen+chLen] = 0;
|
| -
|
| - if(oldExLen == 0) {
|
| - u_strncpy(second, src.source+chOffset, chLen);
|
| - second[chLen] = 0;
|
| - } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
|
| - u_strncpy(second, src.source+oldExOffset, oldExLen);
|
| - u_strncpy(second+oldChLen, src.source+chOffset, chLen);
|
| - u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
|
| - second[2*oldExLen+chLen] = 0;
|
| - }
|
| - result = ucol_strcoll(coll, first, -1, second, -1);
|
| - if(result == UCOL_EQUAL) {
|
| - doTest(coll, first, second, UCOL_EQUAL);
|
| - } else {
|
| - log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
|
| - }
|
| - }
|
| - }
|
| - if(strength != UCOL_TOK_RESET) {
|
| - oldChOffset = chOffset;
|
| - oldChLen = chLen;
|
| - oldExOffset = exOffset;
|
| - oldExLen = exLen;
|
| - }
|
| - }
|
| - status = U_ZERO_ERROR;
|
| - }
|
| - else {
|
| - log_err("Unexpected failure getting rules %s\n", u_errorName(status));
|
| - return;
|
| - }
|
| - if (U_FAILURE(status)) {
|
| - log_err("Error parsing rules %s\n", u_errorName(status));
|
| - return;
|
| - }
|
| - status = U_ZERO_ERROR;
|
| + varTop1 = ucol_setVariableTop(coll, °ree, 1, &status);
|
| + varTop2 = ucol_getVariableTop(coll, &status);
|
| + log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
|
| + if(U_FAILURE(status) || varTop1 != varTop2 ||
|
| + !ucol_equal(coll, &nul, 0, &space, 1) ||
|
| + !ucol_equal(coll, &nul, 0, &dot, 1) ||
|
| + !ucol_equal(coll, &nul, 0, °ree, 1) ||
|
| + ucol_equal(coll, &nul, 0, &dollar, 1) ||
|
| + ucol_equal(coll, &nul, 0, &zero, 1) ||
|
| + ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) {
|
| + log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
|
| }
|
|
|
| - setTestOption(QUICK_OPTION, myQ);
|
| + varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
|
| + varTop2 = ucol_getVariableTop(coll, &status);
|
| + log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
|
| + if(U_FAILURE(status) || varTop1 != varTop2 ||
|
| + !ucol_equal(coll, &nul, 0, &space, 1) ||
|
| + !ucol_equal(coll, &nul, 0, &dot, 1) ||
|
| + !ucol_equal(coll, &nul, 0, °ree, 1) ||
|
| + !ucol_equal(coll, &nul, 0, &dollar, 1) ||
|
| + ucol_equal(coll, &nul, 0, &zero, 1) ||
|
| + ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
|
| + log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
|
| + }
|
|
|
| log_verbose("Testing setting variable top to contractions\n");
|
| {
|
| - UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
|
| - int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
|
| - while(*conts != 0) {
|
| - /*
|
| - * A continuation is NUL-terminated and NUL-padded
|
| - * except if it has the maximum length.
|
| - */
|
| - int32_t contractionLength = maxUCAContractionLength;
|
| - while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
|
| - --contractionLength;
|
| - }
|
| - if(*(conts+1)==0) { /* pre-context */
|
| - varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
|
| - } else {
|
| - varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
|
| - }
|
| - if(U_FAILURE(status)) {
|
| - if(status == U_PRIMARY_TOO_LONG_ERROR) {
|
| - /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
|
| - * therefore it is not an error when it complains about them. */
|
| - log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
|
| - *conts, *(conts+1), *(conts+2));
|
| - } else {
|
| - log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
|
| - *conts, *(conts+1), *(conts+2), u_errorName(status));
|
| - }
|
| - status = U_ZERO_ERROR;
|
| - }
|
| - conts+=maxUCAContractionLength;
|
| - }
|
| -
|
| - status = U_ZERO_ERROR;
|
| -
|
| + UChar first[4] = { 0 };
|
| first[0] = 0x0040;
|
| first[1] = 0x0050;
|
| first[2] = 0x0000;
|
|
|
| + status = U_ZERO_ERROR;
|
| ucol_setVariableTop(coll, first, -1, &status);
|
|
|
| if(U_SUCCESS(status)) {
|
| @@ -3203,21 +1804,110 @@ static void TestVariableTopSetting(void) {
|
| log_verbose("Testing calling with error set\n");
|
|
|
| status = U_INTERNAL_PROGRAM_ERROR;
|
| - varTop1 = ucol_setVariableTop(coll, first, 1, &status);
|
| + varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
|
| varTop2 = ucol_getVariableTop(coll, &status);
|
| ucol_restoreVariableTop(coll, varTop2, &status);
|
| - varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
|
| + varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
|
| varTop2 = ucol_getVariableTop(NULL, &status);
|
| ucol_restoreVariableTop(NULL, varTop2, &status);
|
| if(status != U_INTERNAL_PROGRAM_ERROR) {
|
| log_err("Bad reaction to passed error!\n");
|
| }
|
| - uprv_free(src.source);
|
| ucol_close(coll);
|
| } else {
|
| log_data_err("Couldn't open UCA collator\n");
|
| }
|
| +}
|
| +
|
| +static void TestMaxVariable() {
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UColReorderCode oldMax, max;
|
| + UCollator *coll;
|
| +
|
| + static const UChar nul = 0;
|
| + static const UChar space = 0x20;
|
| + static const UChar dot = 0x2e; /* punctuation */
|
| + static const UChar degree = 0xb0; /* symbol */
|
| + static const UChar dollar = 0x24; /* currency symbol */
|
| + static const UChar zero = 0x30; /* digit */
|
| +
|
| + coll = ucol_open("", &status);
|
| + if(U_FAILURE(status)) {
|
| + log_data_err("Couldn't open root collator\n");
|
| + return;
|
| + }
|
| +
|
| + oldMax = ucol_getMaxVariable(coll);
|
| + log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
|
| + ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
|
| +
|
| + ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
|
| + max = ucol_getMaxVariable(coll);
|
| + log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
|
| + if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
|
| + !ucol_equal(coll, &nul, 0, &space, 1) ||
|
| + ucol_equal(coll, &nul, 0, &dot, 1) ||
|
| + ucol_equal(coll, &nul, 0, °ree, 1) ||
|
| + ucol_equal(coll, &nul, 0, &dollar, 1) ||
|
| + ucol_equal(coll, &nul, 0, &zero, 1) ||
|
| + ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
|
| + log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
|
| + }
|
| +
|
| + ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
|
| + max = ucol_getMaxVariable(coll);
|
| + log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
|
| + if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
|
| + !ucol_equal(coll, &nul, 0, &space, 1) ||
|
| + !ucol_equal(coll, &nul, 0, &dot, 1) ||
|
| + ucol_equal(coll, &nul, 0, °ree, 1) ||
|
| + ucol_equal(coll, &nul, 0, &dollar, 1) ||
|
| + ucol_equal(coll, &nul, 0, &zero, 1) ||
|
| + ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) {
|
| + log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
|
| + }
|
| +
|
| + ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
|
| + max = ucol_getMaxVariable(coll);
|
| + log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
|
| + if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
|
| + !ucol_equal(coll, &nul, 0, &space, 1) ||
|
| + !ucol_equal(coll, &nul, 0, &dot, 1) ||
|
| + !ucol_equal(coll, &nul, 0, °ree, 1) ||
|
| + ucol_equal(coll, &nul, 0, &dollar, 1) ||
|
| + ucol_equal(coll, &nul, 0, &zero, 1) ||
|
| + ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) {
|
| + log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
|
| + }
|
| +
|
| + ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
|
| + max = ucol_getMaxVariable(coll);
|
| + log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
|
| + if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
|
| + !ucol_equal(coll, &nul, 0, &space, 1) ||
|
| + !ucol_equal(coll, &nul, 0, &dot, 1) ||
|
| + !ucol_equal(coll, &nul, 0, °ree, 1) ||
|
| + !ucol_equal(coll, &nul, 0, &dollar, 1) ||
|
| + ucol_equal(coll, &nul, 0, &zero, 1) ||
|
| + ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
|
| + log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
|
| + }
|
| +
|
| + log_verbose("Test restoring maxVariable\n");
|
| + status = U_ZERO_ERROR;
|
| + ucol_setMaxVariable(coll, oldMax, &status);
|
| + if(oldMax != ucol_getMaxVariable(coll)) {
|
| + log_err("Couldn't restore old maxVariable\n");
|
| + }
|
|
|
| + log_verbose("Testing calling with error set\n");
|
| + status = U_INTERNAL_PROGRAM_ERROR;
|
| + ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
|
| + max = ucol_getMaxVariable(coll);
|
| + if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
|
| + log_err("Bad reaction to passed error!\n");
|
| + }
|
| + ucol_close(coll);
|
| }
|
|
|
| static void TestNonChars(void) {
|
| @@ -3702,6 +2392,8 @@ static void TestRuleOptions(void) {
|
| const char *data[10];
|
| const uint32_t len;
|
| } tests[] = {
|
| +#if 0
|
| + /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
|
| /* - all befores here amount to zero */
|
| { "&[before 3][first tertiary ignorable]<<<a",
|
| { "\\u0000", "a"}, 2
|
| @@ -3710,25 +2402,35 @@ static void TestRuleOptions(void) {
|
| { "&[before 3][last tertiary ignorable]<<<a",
|
| { "\\u0000", "a"}, 2
|
| }, /* you cannot go before last tertiary ignorable */
|
| -
|
| +#endif
|
| + /*
|
| + * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
|
| + * and it *is* possible to "go before" that.
|
| + */
|
| { "&[before 3][first secondary ignorable]<<<a",
|
| { "\\u0000", "a"}, 2
|
| - }, /* you cannot go before first secondary ignorable */
|
| + },
|
|
|
| { "&[before 3][last secondary ignorable]<<<a",
|
| { "\\u0000", "a"}, 2
|
| - }, /* you cannot go before first secondary ignorable */
|
| + },
|
|
|
| /* 'normal' befores */
|
|
|
| - { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
|
| + /*
|
| + * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
|
| + * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
|
| + * because there is no tailoring space before that boundary.
|
| + * Made the tests work by tailoring to a space instead.
|
| + */
|
| + { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
|
| { "c", "b", "\\u0332", "a" }, 4
|
| },
|
|
|
| /* we don't have a code point that corresponds to
|
| * the last primary ignorable
|
| */
|
| - { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
|
| + { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
|
| { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
|
| },
|
|
|
| @@ -3754,14 +2456,14 @@ static void TestRuleOptions(void) {
|
| "&[first implicit]<a",
|
| { "b", "\\u4e00", "a", "\\u4e01"}, 4
|
| },
|
| -
|
| +#if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
|
| { "&[before 1][last implicit]<b"
|
| "&[last implicit]<a",
|
| { "b", "\\U0010FFFD", "a" }, 3
|
| },
|
| -
|
| +#endif
|
| { "&[last variable]<z"
|
| - "&[last primary ignorable]<x"
|
| + "&' '<x" /* was &[last primary ignorable]<x, see above */
|
| "&[last secondary ignorable]<<y"
|
| "&[last tertiary ignorable]<<<w"
|
| "&[top]<u",
|
| @@ -4007,7 +2709,7 @@ static void TestPartialSortKeyTermination(void) {
|
| "\\udc00\\ud800\\ud800"
|
| };
|
|
|
| - int32_t i = sizeof(UCollator);
|
| + int32_t i;
|
|
|
| UErrorCode status = U_ZERO_ERROR;
|
|
|
| @@ -4081,7 +2783,7 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo
|
| errorNo++;
|
| }
|
| ucol_close(target);
|
| - if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
|
| + if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
|
| target = ucol_safeClone(source, NULL, NULL, &status);
|
| if(U_FAILURE(status)) {
|
| log_err("Error creating clone\n");
|
| @@ -4116,7 +2818,8 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo
|
| errorNo++;
|
| return errorNo;
|
| }
|
| - if(!ucol_equals(source, target)) {
|
| + /* Note: The tailoring rule string is an optional data item. */
|
| + if(!ucol_equals(source, target) && sourceRulesLen != 0) {
|
| log_err("Collator different from collator that was created from the same rules\n");
|
| errorNo++;
|
| }
|
| @@ -4128,7 +2831,7 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo
|
|
|
| static void TestEquals(void) {
|
| /* ucol_equals is not currently a public API. There is a chance that it will become
|
| - * something like this, but currently it is only used by RuleBasedCollator::operator==
|
| + * something like this.
|
| */
|
| /* test whether the two collators instantiated from the same locale are equal */
|
| UErrorCode status = U_ZERO_ERROR;
|
| @@ -4183,8 +2886,8 @@ static void TestEquals(void) {
|
| if(!ucol_equals(source, source)) {
|
| log_err("Same collator not equal\n");
|
| }
|
| - if(TestEqualsForCollator(locName, source, target)) {
|
| - log_err("Errors for root\n", locName);
|
| + if(TestEqualsForCollator("root", source, target)) {
|
| + log_err("Errors for root\n");
|
| }
|
| ucol_close(source);
|
|
|
| @@ -4399,83 +3102,6 @@ static void TestPinyinProblem(void) {
|
| genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
|
| }
|
|
|
| -#define TST_UCOL_MAX_INPUT 0x220001
|
| -#define topByte 0xFF000000;
|
| -#define bottomByte 0xFF;
|
| -#define fourBytes 0xFFFFFFFF;
|
| -
|
| -
|
| -static void showImplicit(UChar32 i) {
|
| - if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
|
| - log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
|
| - }
|
| -}
|
| -
|
| -static void TestImplicitGeneration(void) {
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UChar32 last = 0;
|
| - UChar32 current;
|
| - UChar32 i = 0, j = 0;
|
| - UChar32 roundtrip = 0;
|
| - UChar32 lastBottom = 0;
|
| - UChar32 currentBottom = 0;
|
| - UChar32 lastTop = 0;
|
| - UChar32 currentTop = 0;
|
| -
|
| - UCollator *coll = ucol_open("root", &status);
|
| - if(U_FAILURE(status)) {
|
| - log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
|
| - return;
|
| - }
|
| -
|
| - uprv_uca_getRawFromImplicit(0xE20303E7);
|
| -
|
| - for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
|
| - current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
|
| -
|
| - /* check that it round-trips AND that all intervening ones are illegal*/
|
| - roundtrip = uprv_uca_getRawFromImplicit(current);
|
| - if (roundtrip != i) {
|
| - log_err("No roundtrip %08X\n", i);
|
| - }
|
| - if (last != 0) {
|
| - for (j = last + 1; j < current; ++j) {
|
| - roundtrip = uprv_uca_getRawFromImplicit(j);
|
| - /* raise an error if it *doesn't* find an error*/
|
| - if (roundtrip != -1) {
|
| - log_err("Fails to recognize illegal %08X\n", j);
|
| - }
|
| - }
|
| - }
|
| - /* now do other consistency checks*/
|
| - lastBottom = last & bottomByte;
|
| - currentBottom = current & bottomByte;
|
| - lastTop = last & topByte;
|
| - currentTop = current & topByte;
|
| - (void)lastBottom; /* Suppress set but not used warnings. */
|
| - (void)currentBottom;
|
| -
|
| - /* print out some values for spot-checking*/
|
| - if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
|
| - showImplicit(i-3);
|
| - showImplicit(i-2);
|
| - showImplicit(i-1);
|
| - showImplicit(i);
|
| - showImplicit(i+1);
|
| - showImplicit(i+2);
|
| - }
|
| - last = current;
|
| -
|
| - if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
|
| - log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
|
| - }
|
| - }
|
| - showImplicit(TST_UCOL_MAX_INPUT-2);
|
| - showImplicit(TST_UCOL_MAX_INPUT-1);
|
| - showImplicit(TST_UCOL_MAX_INPUT);
|
| - ucol_close(coll);
|
| -}
|
| -
|
| /**
|
| * Iterate through the given iterator, checking to see that all the strings
|
| * in the expected array are present.
|
| @@ -4602,8 +3228,8 @@ ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
|
| &isAvailable, &ec);
|
| if (assertSuccess("getFunctionalEquivalent", &ec)) {
|
| assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
|
| - assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
|
| - isAvailable == TRUE);
|
| + assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
|
| + isAvailable == FALSE);
|
| }
|
| }
|
|
|
| @@ -4955,9 +3581,20 @@ TestVI5913(void)
|
| UCollator *coll =NULL;
|
| uint8_t resColl[100], expColl[100];
|
| int32_t rLen, tLen, ruleLen, sLen, kLen;
|
| - UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
|
| + UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
|
| UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
|
| - UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
|
| + /*
|
| + * Note: Just tailoring &z<ae^ does not work as expected:
|
| + * The UCA spec requires for discontiguous contractions that they
|
| + * extend an *existing match* by one combining mark at a time.
|
| + * Therefore, ae must be a contraction so that the builder finds
|
| + * discontiguous contractions for ae^, for example with an intervening underdot.
|
| + * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
|
| + */
|
| + UChar rule3[256]={
|
| + 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
|
| + 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
|
| + 0};
|
| static const UChar tData[][20]={
|
| {0x1EAC, 0},
|
| {0x0041, 0x0323, 0x0302, 0},
|
| @@ -5098,18 +3735,22 @@ TestVI5913(void)
|
| coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
|
| tLen = u_strlen(tailorData3[3]);
|
| kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
|
| + log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
|
| + for(i = 0; i<kLen; i++) {
|
| + log_verbose(" %02X", expColl[i]);
|
| + }
|
| for (j=4; j<6; j++) {
|
| tLen = u_strlen(tailorData3[j]);
|
| rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
|
|
|
| if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
|
| - log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
|
| + log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
|
| for(i = 0; i<rLen; i++) {
|
| log_err(" %02X", resColl[i]);
|
| }
|
| }
|
|
|
| - log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
|
| + log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
|
| for(i = 0; i<rLen; i++) {
|
| log_verbose(" %02X", resColl[i]);
|
| }
|
| @@ -5153,11 +3794,15 @@ TestTailor6179(void)
|
| /*
|
| * These values from FractionalUCA.txt will change,
|
| * and need to be updated here.
|
| + * TODO: Make this not check for particular sort keys.
|
| + * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
|
| */
|
| - static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
|
| - static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
|
| - static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
|
| - static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
|
| + static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
|
| + static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
|
| + static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
|
| + static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
|
| +
|
| + UParseError parseError;
|
|
|
| /* Test [Last Primary ignorable] */
|
|
|
| @@ -5191,10 +3836,12 @@ TestTailor6179(void)
|
|
|
| /* Test [Last Secondary ignorable] */
|
| log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
|
| - ruleLen = u_strlen(rule1);
|
| - coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
|
| + ruleLen = u_strlen(rule2);
|
| + coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
|
| if (U_FAILURE(status)) {
|
| log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
|
| + log_info(" offset=%d \"%s\" | \"%s\"\n",
|
| + parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
|
| return;
|
| }
|
| tLen = u_strlen(tData2[0]);
|
| @@ -5206,16 +3853,14 @@ TestTailor6179(void)
|
| }
|
| log_err("\n");
|
| }
|
| - if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see ticket #8982 */
|
| - tLen = u_strlen(tData2[1]);
|
| - rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
|
| - if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
|
| - log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
|
| - for(i = 0; i<rLen; i++) {
|
| - log_err(" %02X", resColl[i]);
|
| - }
|
| - log_err("\n");
|
| + tLen = u_strlen(tData2[1]);
|
| + rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
|
| + if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
|
| + log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
|
| + for(i = 0; i<rLen; i++) {
|
| + log_err(" %02X", resColl[i]);
|
| }
|
| + log_err("\n");
|
| }
|
| ucol_close(coll);
|
| }
|
| @@ -5582,6 +4227,10 @@ static void doTestOneTestCase(const OneTestCase testcases[],
|
| myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
|
| if(U_FAILURE(status)){
|
| log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
|
| + log_info(" offset=%d \"%s\" | \"%s\"\n",
|
| + parse_error.offset,
|
| + aescstrdup(parse_error.preContext, -1),
|
| + aescstrdup(parse_error.postContext, -1));
|
| return;
|
| }
|
| log_verbose("Testing the <<* syntax\n");
|
| @@ -5627,13 +4276,13 @@ const static OneTestCase rangeTestcases[] = {
|
| static int nRangeTestcases = LEN(rangeTestcases);
|
|
|
| const static OneTestCase rangeTestcasesSupplemental[] = {
|
| - { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */
|
| - { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */
|
| + { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */
|
| + { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */
|
| { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
|
| - { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */
|
| + { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */
|
| { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
|
| { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
|
| - { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */
|
| + { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */
|
| };
|
|
|
| static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
|
| @@ -5690,10 +4339,10 @@ static void TestSameStrengthListQuoted(void)
|
| static void TestSameStrengthListSupplemental(void)
|
| {
|
| const char* strRules[] = {
|
| - "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
|
| - "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
|
| - "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
|
| - "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
|
| + "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
|
| + "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
|
| + "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
|
| + "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
|
| };
|
| doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
|
| }
|
| @@ -5741,7 +4390,8 @@ static void TestSameStrengthListRanges(void)
|
| static void TestSameStrengthListSupplementalRanges(void)
|
| {
|
| const char* strRules[] = {
|
| - "&\\ufffe<*\\uffff-\\U00010002",
|
| + /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
|
| + "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
|
| };
|
| doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
|
| }
|
| @@ -6041,6 +4691,7 @@ static void TestReorderingAPI(void)
|
| int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
|
| int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
|
| int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
|
| + int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
|
| UCollationResult collResult;
|
| int32_t retrievedReorderCodesLength;
|
| int32_t retrievedReorderCodes[10];
|
| @@ -6118,6 +4769,22 @@ static void TestReorderingAPI(void)
|
| return;
|
| }
|
|
|
| + /* clear the reordering using [NONE] */
|
| + ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
|
| + return;
|
| + }
|
| +
|
| + /* get the reordering again */
|
| + retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
|
| + if (retrievedReorderCodesLength != 0) {
|
| + log_err_status(status,
|
| + "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
|
| + retrievedReorderCodesLength);
|
| + return;
|
| + }
|
| +
|
| /* test for error condition on duplicate reorder codes */
|
| ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
|
| if (!U_FAILURE(status)) {
|
| @@ -6272,17 +4939,22 @@ static void TestReorderingAPIWithRuleCreatedCollator(void)
|
| ucol_close(myCollation);
|
| }
|
|
|
| -static int compareUScriptCodes(const void * a, const void * b)
|
| -{
|
| - return ( *(int32_t*)a - *(int32_t*)b );
|
| +static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
|
| + int32_t i;
|
| + for (i = 0; i < length; ++i) {
|
| + if (expectedScript == scripts[i]) { return TRUE; }
|
| + }
|
| + return FALSE;
|
| }
|
|
|
| static void TestEquivalentReorderingScripts(void) {
|
| UErrorCode status = U_ZERO_ERROR;
|
| - int32_t equivalentScripts[50];
|
| - int32_t equivalentScriptsLength;
|
| - int loopIndex;
|
| - int32_t equivalentScriptsResult[] = {
|
| + int32_t equivalentScripts[100];
|
| + int32_t length;
|
| + int i;
|
| + int32_t prevScript;
|
| + /* At least these scripts are expected to be equivalent. There may be more. */
|
| + static const int32_t expectedScripts[] = {
|
| USCRIPT_BOPOMOFO,
|
| USCRIPT_LISU,
|
| USCRIPT_LYCIAN,
|
| @@ -6311,46 +4983,49 @@ static void TestEquivalentReorderingScripts(void) {
|
| USCRIPT_MEROITIC_HIEROGLYPHS
|
| };
|
|
|
| - qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
|
| -
|
| /* UScript.GOTHIC */
|
| - equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
|
| + length = ucol_getEquivalentReorderCodes(
|
| + USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
|
| if (U_FAILURE(status)) {
|
| - log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
|
| + log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
|
| return;
|
| }
|
| - /*
|
| - fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
|
| - fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
|
| - for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
|
| - fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
|
| + if (length < LEN(expectedScripts)) {
|
| + log_err("ERROR/Gothic: retrieved equivalent script length wrong: "
|
| + "expected at least %d, was = %d\n",
|
| + LEN(expectedScripts), length);
|
| }
|
| - */
|
| - if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
|
| - log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
|
| - return;
|
| + prevScript = -1;
|
| + for (i = 0; i < length; ++i) {
|
| + int32_t script = equivalentScripts[i];
|
| + if (script <= prevScript) {
|
| + log_err("ERROR/Gothic: equivalent scripts out of order at index %d\n", i);
|
| + }
|
| + prevScript = script;
|
| }
|
| - for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
|
| - if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
|
| - log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
|
| - return;
|
| + for (i = 0; i < LEN(expectedScripts); i++) {
|
| + if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
|
| + log_err("ERROR/Gothic: equivalent scripts do not contain %d\n",
|
| + expectedScripts[i]);
|
| }
|
| }
|
|
|
| /* UScript.SHAVIAN */
|
| - equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
|
| + length = ucol_getEquivalentReorderCodes(
|
| + USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
|
| if (U_FAILURE(status)) {
|
| - log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
|
| + log_err_status(status, "ERROR/Shavian: retrieving equivalent reorder codes: %s\n", myErrorName(status));
|
| return;
|
| }
|
| - if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
|
| - log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
|
| - return;
|
| + if (length < LEN(expectedScripts)) {
|
| + log_err("ERROR/Shavian: retrieved equivalent script length wrong: "
|
| + "expected at least %d, was = %d\n",
|
| + LEN(expectedScripts), length);
|
| }
|
| - for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
|
| - if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
|
| - log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
|
| - return;
|
| + for (i = 0; i < LEN(expectedScripts); i++) {
|
| + if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
|
| + log_err("ERROR/Shavian: equivalent scripts do not contain %d\n",
|
| + expectedScripts[i]);
|
| }
|
| }
|
| }
|
| @@ -6854,6 +5529,11 @@ static void TestImport(void)
|
| }
|
|
|
| virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
|
| + if(viruleslength == 0) {
|
| + log_data_err("missing vi tailoring rule string\n");
|
| + ucol_close(vicoll);
|
| + return;
|
| + }
|
| escoll = ucol_open("es", &status);
|
| esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
|
| viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
|
| @@ -6953,6 +5633,11 @@ static void TestImportWithType(void)
|
| return;
|
| }
|
| virules = ucol_getRules(vicoll, &viruleslength);
|
| + if(viruleslength == 0) {
|
| + log_data_err("missing vi tailoring rule string\n");
|
| + ucol_close(vicoll);
|
| + return;
|
| + }
|
| /* decoll = ucol_open("de@collation=phonebook", &status); */
|
| decoll = ucol_open("de-u-co-phonebk", &status);
|
| if(U_FAILURE(status)){
|
| @@ -7076,7 +5761,7 @@ static const LongUpperStrItem longUpperStrItems[] = {
|
| { NULL, 0 }
|
| };
|
|
|
| -enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
|
| +enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
|
|
|
| /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
|
| static void TestCaseLevelBufferOverflow(void)
|
| @@ -7114,6 +5799,38 @@ static void TestCaseLevelBufferOverflow(void)
|
| }
|
| }
|
|
|
| +/* Test for #10595 */
|
| +static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
|
| +#define KEY_PART_SIZE 16
|
| +
|
| +static void TestNextSortKeyPartJaIdentical(void)
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UCollator *coll;
|
| + uint8_t keyPart[KEY_PART_SIZE];
|
| + UCharIterator iter;
|
| + uint32_t state[2] = {0, 0};
|
| + int32_t keyPartLen;
|
| +
|
| + coll = ucol_open("ja", &status);
|
| + ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
|
| + return;
|
| + }
|
| +
|
| + uiter_setString(&iter, testJapaneseName, 5);
|
| + keyPartLen = KEY_PART_SIZE;
|
| + while (keyPartLen == KEY_PART_SIZE) {
|
| + keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
|
| + break;
|
| + }
|
| + }
|
| +
|
| + ucol_close(coll);
|
| +}
|
|
|
| #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
|
|
|
| @@ -7131,6 +5848,7 @@ void addMiscCollTest(TestNode** root)
|
| TEST(TestExtremeCompression);
|
| TEST(TestSurrogates);
|
| TEST(TestVariableTopSetting);
|
| + TEST(TestMaxVariable);
|
| TEST(TestBocsuCoverage);
|
| TEST(TestCyrillicTailoring);
|
| TEST(TestCase);
|
| @@ -7138,9 +5856,6 @@ void addMiscCollTest(TestNode** root)
|
| TEST(BlackBirdTest);
|
| TEST(FunkyATest);
|
| TEST(BillFairmanTest);
|
| - TEST(RamsRulesTest);
|
| - TEST(IsTailoredTest);
|
| - TEST(TestCollations);
|
| TEST(TestChMove);
|
| TEST(TestImplicitTailoring);
|
| TEST(TestFCDProblem);
|
| @@ -7149,8 +5864,6 @@ void addMiscCollTest(TestNode** root)
|
| TEST(TestJ815);
|
| /*TEST(TestJ831);*/ /* we changed lv locale */
|
| TEST(TestBefore);
|
| - TEST(TestRedundantRules);
|
| - TEST(TestExpansionSyntax);
|
| TEST(TestHangulTailoring);
|
| TEST(TestUCARules);
|
| TEST(TestIncrementalNormalize);
|
| @@ -7172,7 +5885,6 @@ void addMiscCollTest(TestNode** root)
|
| TEST(TestNumericCollation);
|
| TEST(TestTibetanConformance);
|
| TEST(TestPinyinProblem);
|
| - TEST(TestImplicitGeneration);
|
| TEST(TestSeparateTrees);
|
| TEST(TestBeforePinyin);
|
| TEST(TestBeforeTightening);
|
| @@ -7224,6 +5936,7 @@ void addMiscCollTest(TestNode** root)
|
| TEST(TestReorderWithNumericCollation);
|
|
|
| TEST(TestCaseLevelBufferOverflow);
|
| + TEST(TestNextSortKeyPartJaIdentical);
|
| }
|
|
|
| #endif /* #if !UCONFIG_NO_COLLATION */
|
|
|