Index: source/test/cintltst/cmsccoll.c |
diff --git a/source/test/cintltst/cmsccoll.c b/source/test/cintltst/cmsccoll.c |
index b47472806b246bc3c294b6ed7f7dbe2014a7577b..d20cd9a93aeed7ccf1123ce532201b071b8d4144 100644 |
--- a/source/test/cintltst/cmsccoll.c |
+++ b/source/test/cintltst/cmsccoll.c |
@@ -1,7 +1,7 @@ |
/******************************************************************** |
* COPYRIGHT: |
- * Copyright (c) 2001-2013, International Business Machines Corporation and |
+ * Copyright (c) 2001-2014, International Business Machines Corporation and |
* others. All Rights Reserved. |
********************************************************************/ |
/******************************************************************************* |
@@ -29,7 +29,6 @@ |
#include "unicode/ustring.h" |
#include "string.h" |
#include "ucol_imp.h" |
-#include "ucol_tok.h" |
#include "cmemory.h" |
#include "cstring.h" |
#include "uassert.h" |
@@ -413,1117 +412,6 @@ static void BillFairmanTest(void) { |
} |
} |
-static void testPrimary(UCollator* col, const UChar* p,const UChar* q){ |
- UChar source[256] = { '\0'}; |
- UChar target[256] = { '\0'}; |
- UChar preP = 0x31a3; |
- UChar preQ = 0x310d; |
-/* |
- UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491; |
- UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413; |
-*/ |
- /*log_verbose("Testing primary\n");*/ |
- |
- doTest(col, p, q, UCOL_LESS); |
-/* |
- UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q)); |
- |
- if(result!=UCOL_LESS){ |
- aescstrdup(p,utfSource,256); |
- aescstrdup(q,utfTarget,256); |
- fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); |
- } |
-*/ |
- source[0] = preP; |
- u_strcpy(source+1,p); |
- target[0] = preQ; |
- u_strcpy(target+1,q); |
- doTest(col, source, target, UCOL_LESS); |
-/* |
- fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget); |
-*/ |
-} |
- |
-static void testSecondary(UCollator* col, const UChar* p,const UChar* q){ |
- UChar source[256] = { '\0'}; |
- UChar target[256] = { '\0'}; |
- |
- /*log_verbose("Testing secondary\n");*/ |
- |
- doTest(col, p, q, UCOL_LESS); |
-/* |
- fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget); |
-*/ |
- source[0] = 0x0053; |
- u_strcpy(source+1,p); |
- target[0]= 0x0073; |
- u_strcpy(target+1,q); |
- |
- doTest(col, source, target, UCOL_LESS); |
-/* |
- fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget); |
-*/ |
- |
- |
- u_strcpy(source,p); |
- source[u_strlen(p)] = 0x62; |
- source[u_strlen(p)+1] = 0; |
- |
- |
- u_strcpy(target,q); |
- target[u_strlen(q)] = 0x61; |
- target[u_strlen(q)+1] = 0; |
- |
- doTest(col, source, target, UCOL_GREATER); |
- |
-/* |
- fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget); |
-*/ |
-} |
- |
-static void testTertiary(UCollator* col, const UChar* p,const UChar* q){ |
- UChar source[256] = { '\0'}; |
- UChar target[256] = { '\0'}; |
- |
- /*log_verbose("Testing tertiary\n");*/ |
- |
- doTest(col, p, q, UCOL_LESS); |
-/* |
- fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget); |
-*/ |
- source[0] = 0x0020; |
- u_strcpy(source+1,p); |
- target[0]= 0x002D; |
- u_strcpy(target+1,q); |
- |
- doTest(col, source, target, UCOL_LESS); |
-/* |
- fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget); |
-*/ |
- |
- u_strcpy(source,p); |
- source[u_strlen(p)] = 0xE0; |
- source[u_strlen(p)+1] = 0; |
- |
- u_strcpy(target,q); |
- target[u_strlen(q)] = 0x61; |
- target[u_strlen(q)+1] = 0; |
- |
- doTest(col, source, target, UCOL_GREATER); |
- |
-/* |
- fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget); |
-*/ |
-} |
- |
-static void testEquality(UCollator* col, const UChar* p,const UChar* q){ |
-/* |
- UChar source[256] = { '\0'}; |
- UChar target[256] = { '\0'}; |
-*/ |
- |
- doTest(col, p, q, UCOL_EQUAL); |
-/* |
- fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); |
-*/ |
-} |
- |
-static void testCollator(UCollator *coll, UErrorCode *status) { |
- const UChar *rules = NULL, *current = NULL; |
- int32_t ruleLen = 0; |
- uint32_t strength = 0; |
- uint32_t chOffset = 0; uint32_t chLen = 0; |
- uint32_t exOffset = 0; uint32_t exLen = 0; |
- uint32_t prefixOffset = 0; uint32_t prefixLen = 0; |
- uint32_t firstEx = 0; |
-/* uint32_t rExpsLen = 0; */ |
- uint32_t firstLen = 0; |
- UBool varT = FALSE; UBool top_ = TRUE; |
- uint16_t specs = 0; |
- UBool startOfRules = TRUE; |
- UBool lastReset = FALSE; |
- UBool before = FALSE; |
- uint32_t beforeStrength = 0; |
- UColTokenParser src; |
- UColOptionSet opts; |
- |
- UChar first[256]; |
- UChar second[256]; |
- UChar tempB[256]; |
- uint32_t tempLen; |
- UChar *rulesCopy = NULL; |
- UParseError parseError; |
- |
- uprv_memset(&src, 0, sizeof(UColTokenParser)); |
- |
- src.opts = &opts; |
- |
- rules = ucol_getRules(coll, &ruleLen); |
- if(U_SUCCESS(*status) && ruleLen > 0) { |
- rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); |
- uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); |
- src.current = src.source = rulesCopy; |
- src.end = rulesCopy+ruleLen; |
- src.extraCurrent = src.end; |
- src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; |
- *first = *second = 0; |
- |
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to |
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ |
- while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) { |
- strength = src.parsedToken.strength; |
- chOffset = src.parsedToken.charsOffset; |
- chLen = src.parsedToken.charsLen; |
- exOffset = src.parsedToken.extensionOffset; |
- exLen = src.parsedToken.extensionLen; |
- prefixOffset = src.parsedToken.prefixOffset; |
- prefixLen = src.parsedToken.prefixLen; |
- specs = src.parsedToken.flags; |
- |
- startOfRules = FALSE; |
- varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); |
- (void)varT; /* Suppress set but not used warning. */ |
- top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); |
- if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */ |
- second[0] = 0; |
- } else { |
- u_strncpy(second,src.source+chOffset, chLen); |
- second[chLen] = 0; |
- |
- if(exLen > 0 && firstEx == 0) { |
- u_strncat(first, src.source+exOffset, exLen); |
- first[firstLen+exLen] = 0; |
- } |
- |
- if(lastReset == TRUE && prefixLen != 0) { |
- u_strncpy(first+prefixLen, first, firstLen); |
- u_strncpy(first, src.source+prefixOffset, prefixLen); |
- first[firstLen+prefixLen] = 0; |
- firstLen = firstLen+prefixLen; |
- } |
- |
- if(before == TRUE) { /* swap first and second */ |
- u_strcpy(tempB, first); |
- u_strcpy(first, second); |
- u_strcpy(second, tempB); |
- |
- tempLen = firstLen; |
- firstLen = chLen; |
- chLen = tempLen; |
- |
- tempLen = firstEx; |
- firstEx = exLen; |
- exLen = tempLen; |
- if(beforeStrength < strength) { |
- strength = beforeStrength; |
- } |
- } |
- } |
- lastReset = FALSE; |
- |
- switch(strength){ |
- case UCOL_IDENTICAL: |
- testEquality(coll,first,second); |
- break; |
- case UCOL_PRIMARY: |
- testPrimary(coll,first,second); |
- break; |
- case UCOL_SECONDARY: |
- testSecondary(coll,first,second); |
- break; |
- case UCOL_TERTIARY: |
- testTertiary(coll,first,second); |
- break; |
- case UCOL_TOK_RESET: |
- lastReset = TRUE; |
- before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); |
- if(before) { |
- beforeStrength = (specs & UCOL_TOK_BEFORE)-1; |
- } |
- break; |
- default: |
- break; |
- } |
- |
- if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */ |
- before = FALSE; |
- } else { |
- firstLen = chLen; |
- firstEx = exLen; |
- u_strcpy(first, second); |
- } |
- } |
- uprv_free(src.source); |
- uprv_free(src.reorderCodes); |
- } |
-} |
- |
-static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { |
- UCollator *UCA = (UCollator *)collator; |
- return ucol_strcoll(UCA, source, sLen, target, tLen); |
-} |
- |
-/* |
-static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { |
-#if U_PLATFORM_HAS_WIN32_API |
- LCID lcid = (LCID)collator; |
- return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen); |
-#else |
- return 0; |
-#endif |
-} |
-*/ |
- |
-static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts, |
- UChar s1, UChar s2, |
- const UChar *s, const uint32_t sLen, |
- const UChar *t, const uint32_t tLen) { |
- UChar source[256] = {0}; |
- UChar target[256] = {0}; |
- |
- source[0] = s1; |
- u_strcpy(source+1, s); |
- target[0] = s2; |
- u_strcpy(target+1, t); |
- |
- return func(collator, opts, source, sLen+1, target, tLen+1); |
-} |
- |
-static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts, |
- UChar s1, UChar s2, |
- const UChar *s, const uint32_t sLen, |
- const UChar *t, const uint32_t tLen) { |
- UChar source[256] = {0}; |
- UChar target[256] = {0}; |
- |
- u_strcpy(source, s); |
- source[sLen] = s1; |
- u_strcpy(target, t); |
- target[tLen] = s2; |
- |
- return func(collator, opts, source, sLen+1, target, tLen+1); |
-} |
- |
-static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts, |
- const UChar *s, const uint32_t sLen, |
- const UChar *t, const uint32_t tLen, |
- UCollationResult result) { |
- /*UChar fPrimary = 0x6d;*/ |
- /*UChar sPrimary = 0x6e;*/ |
- UChar fSecondary = 0x310d; |
- UChar sSecondary = 0x31a3; |
- UChar fTertiary = 0x310f; |
- UChar sTertiary = 0x31b7; |
- |
- UCollationResult oposite; |
- if(result == UCOL_EQUAL) { |
- return UCOL_IDENTICAL; |
- } else if(result == UCOL_GREATER) { |
- oposite = UCOL_LESS; |
- } else { |
- oposite = UCOL_GREATER; |
- } |
- |
- if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) { |
- return UCOL_PRIMARY; |
- } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) && |
- (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) { |
- return UCOL_SECONDARY; |
- } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) && |
- (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) { |
- return UCOL_TERTIARY; |
- } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) && |
- (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) { |
- return UCOL_QUATERNARY; |
- } else { |
- return UCOL_IDENTICAL; |
- } |
-} |
- |
-static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) { |
- uint32_t i = 0; |
- |
- if(res == UCOL_EQUAL || strength == 0xdeadbeef) { |
- buffer[0] = '='; |
- buffer[1] = '='; |
- buffer[2] = '\0'; |
- } else if(res == UCOL_GREATER) { |
- for(i = 0; i<strength+1; i++) { |
- buffer[i] = '>'; |
- } |
- buffer[strength+1] = '\0'; |
- } else { |
- for(i = 0; i<strength+1; i++) { |
- buffer[i] = '<'; |
- } |
- buffer[strength+1] = '\0'; |
- } |
- |
- return buffer; |
-} |
- |
- |
- |
-static void logFailure (const char *platform, const char *test, |
- const UChar *source, const uint32_t sLen, |
- const UChar *target, const uint32_t tLen, |
- UCollationResult realRes, uint32_t realStrength, |
- UCollationResult expRes, uint32_t expStrength, UBool error) { |
- |
- uint32_t i = 0; |
- |
- char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256]; |
- static int32_t maxOutputLength = 0; |
- int32_t outputLength; |
- |
- *sEsc = *tEsc = *s = *t = 0; |
- if(error == TRUE) { |
- log_err("Difference between expected and generated order. Run test with -v for more info\n"); |
- } else if(getTestOption(VERBOSITY_OPTION) == 0) { |
- return; |
- } |
- for(i = 0; i<sLen; i++) { |
- sprintf(b, "%04X", source[i]); |
- strcat(sEsc, "\\u"); |
- strcat(sEsc, b); |
- strcat(s, b); |
- strcat(s, " "); |
- if(source[i] < 0x80) { |
- sprintf(b, "(%c)", source[i]); |
- strcat(sEsc, b); |
- } |
- } |
- for(i = 0; i<tLen; i++) { |
- sprintf(b, "%04X", target[i]); |
- strcat(tEsc, "\\u"); |
- strcat(tEsc, b); |
- strcat(t, b); |
- strcat(t, " "); |
- if(target[i] < 0x80) { |
- sprintf(b, "(%c)", target[i]); |
- strcat(tEsc, b); |
- } |
- } |
-/* |
- strcpy(output, "[[ "); |
- strcat(output, sEsc); |
- strcat(output, getRelationSymbol(expRes, expStrength, relation)); |
- strcat(output, tEsc); |
- |
- strcat(output, " : "); |
- |
- strcat(output, sEsc); |
- strcat(output, getRelationSymbol(realRes, realStrength, relation)); |
- strcat(output, tEsc); |
- strcat(output, " ]] "); |
- |
- log_verbose("%s", output); |
-*/ |
- |
- |
- strcpy(output, "DIFF: "); |
- |
- strcat(output, s); |
- strcat(output, " : "); |
- strcat(output, t); |
- |
- strcat(output, test); |
- strcat(output, ": "); |
- |
- strcat(output, sEsc); |
- strcat(output, getRelationSymbol(expRes, expStrength, relation)); |
- strcat(output, tEsc); |
- |
- strcat(output, " "); |
- |
- strcat(output, platform); |
- strcat(output, ": "); |
- |
- strcat(output, sEsc); |
- strcat(output, getRelationSymbol(realRes, realStrength, relation)); |
- strcat(output, tEsc); |
- |
- outputLength = (int32_t)strlen(output); |
- if(outputLength > maxOutputLength) { |
- maxOutputLength = outputLength; |
- U_ASSERT(outputLength < sizeof(output)); |
- } |
- |
- log_verbose("%s\n", output); |
- |
-} |
- |
-/* |
-static void printOutRules(const UChar *rules) { |
- uint32_t len = u_strlen(rules); |
- uint32_t i = 0; |
- char toPrint; |
- uint32_t line = 0; |
- |
- fprintf(stdout, "Rules:"); |
- |
- for(i = 0; i<len; i++) { |
- if(rules[i]<0x7f && rules[i]>=0x20) { |
- toPrint = (char)rules[i]; |
- if(toPrint == '&') { |
- line = 1; |
- fprintf(stdout, "\n&"); |
- } else if(toPrint == ';') { |
- fprintf(stdout, "<<"); |
- line+=2; |
- } else if(toPrint == ',') { |
- fprintf(stdout, "<<<"); |
- line+=3; |
- } else { |
- fprintf(stdout, "%c", toPrint); |
- line++; |
- } |
- } else if(rules[i]<0x3400 || rules[i]>=0xa000) { |
- fprintf(stdout, "\\u%04X", rules[i]); |
- line+=6; |
- } |
- if(line>72) { |
- fprintf(stdout, "\n"); |
- line = 0; |
- } |
- } |
- |
- log_verbose("\n"); |
- |
-} |
-*/ |
- |
-static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) { |
- uint32_t diffs = 0; |
- UCollationResult realResult; |
- uint32_t realStrength; |
- |
- uint32_t sLen = u_strlen(first); |
- uint32_t tLen = u_strlen(second); |
- |
- realResult = func(collator, opts, first, sLen, second, tLen); |
- realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult); |
- |
- if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) { |
- logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error); |
- diffs++; |
- } else if(realResult != UCOL_LESS || realStrength != strength) { |
- logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error); |
- diffs++; |
- } |
- return diffs; |
-} |
- |
- |
-static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) { |
- const UChar *rules = NULL, *current = NULL; |
- int32_t ruleLen = 0; |
- uint32_t strength = 0; |
- uint32_t chOffset = 0; uint32_t chLen = 0; |
- uint32_t exOffset = 0; uint32_t exLen = 0; |
- uint32_t prefixOffset = 0; uint32_t prefixLen = 0; |
-/* uint32_t rExpsLen = 0; */ |
- uint32_t firstLen = 0, secondLen = 0; |
- UBool varT = FALSE; UBool top_ = TRUE; |
- uint16_t specs = 0; |
- UBool startOfRules = TRUE; |
- UColTokenParser src; |
- UColOptionSet opts; |
- |
- UChar first[256]; |
- UChar second[256]; |
- UChar *rulesCopy = NULL; |
- |
- uint32_t UCAdiff = 0; |
- uint32_t Windiff = 1; |
- UParseError parseError; |
- |
- (void)top_; /* Suppress set but not used warnings. */ |
- (void)varT; |
- (void)secondLen; |
- (void)prefixLen; |
- (void)prefixOffset; |
- |
- uprv_memset(&src, 0, sizeof(UColTokenParser)); |
- src.opts = &opts; |
- |
- rules = ucol_getRules(coll, &ruleLen); |
- |
- /*printOutRules(rules);*/ |
- |
- if(U_SUCCESS(*status) && ruleLen > 0) { |
- rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); |
- uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); |
- src.current = src.source = rulesCopy; |
- src.end = rulesCopy+ruleLen; |
- src.extraCurrent = src.end; |
- src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; |
- *first = *second = 0; |
- |
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to |
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ |
- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { |
- strength = src.parsedToken.strength; |
- chOffset = src.parsedToken.charsOffset; |
- chLen = src.parsedToken.charsLen; |
- exOffset = src.parsedToken.extensionOffset; |
- exLen = src.parsedToken.extensionLen; |
- prefixOffset = src.parsedToken.prefixOffset; |
- prefixLen = src.parsedToken.prefixLen; |
- specs = src.parsedToken.flags; |
- |
- startOfRules = FALSE; |
- varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); |
- top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); |
- |
- u_strncpy(second,src.source+chOffset, chLen); |
- second[chLen] = 0; |
- secondLen = chLen; |
- |
- if(exLen > 0) { |
- u_strncat(first, src.source+exOffset, exLen); |
- first[firstLen+exLen] = 0; |
- firstLen += exLen; |
- } |
- |
- if(strength != UCOL_TOK_RESET) { |
- if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) { |
- UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error); |
- /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/ |
- } |
- } |
- |
- |
- firstLen = chLen; |
- u_strcpy(first, second); |
- |
- } |
- if(UCAdiff != 0 && Windiff != 0) { |
- log_verbose("\n"); |
- } |
- if(UCAdiff == 0) { |
- log_verbose("No immediate difference with %s!\n", refName); |
- } |
- if(Windiff == 0) { |
- log_verbose("No immediate difference with Win32!\n"); |
- } |
- uprv_free(src.source); |
- uprv_free(src.reorderCodes); |
- } |
-} |
- |
-/* |
- * Takes two CEs (lead and continuation) and |
- * compares them as CEs should be compared: |
- * primary vs. primary, secondary vs. secondary |
- * tertiary vs. tertiary |
- */ |
-static int32_t compareCEs(uint32_t s1, uint32_t s2, |
- uint32_t t1, uint32_t t2) { |
- uint32_t s = 0, t = 0; |
- if(s1 == t1 && s2 == t2) { |
- return 0; |
- } |
- s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); |
- t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16); |
- if(s < t) { |
- return -1; |
- } else if(s > t) { |
- return 1; |
- } else { |
- s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8; |
- t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8; |
- if(s < t) { |
- return -1; |
- } else if(s > t) { |
- return 1; |
- } else { |
- s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF); |
- t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF); |
- if(s < t) { |
- return -1; |
- } else { |
- return 1; |
- } |
- } |
- } |
-} |
- |
-typedef struct { |
- uint32_t startCE; |
- uint32_t startContCE; |
- uint32_t limitCE; |
- uint32_t limitContCE; |
-} indirectBoundaries; |
- |
-/* these values are used for finding CE values for indirect positioning. */ |
-/* Indirect positioning is a mechanism for allowing resets on symbolic */ |
-/* values. It only works for resets and you cannot tailor indirect names */ |
-/* An indirect name can define either an anchor point or a range. An */ |
-/* anchor point behaves in exactly the same way as a code point in reset */ |
-/* would, except that it cannot be tailored. A range (we currently only */ |
-/* know for the [top] range will explicitly set the upper bound for */ |
-/* generated CEs, thus allowing for better control over how many CEs can */ |
-/* be squeezed between in the range without performance penalty. */ |
-/* In that respect, we use [top] for tailoring of locales that use CJK */ |
-/* characters. Other indirect values are currently a pure convenience, */ |
-/* they can be used to assure that the CEs will be always positioned in */ |
-/* the same place relative to a point with known properties (e.g. first */ |
-/* primary ignorable). */ |
-static indirectBoundaries ucolIndirectBoundaries[15]; |
-static UBool indirectBoundariesSet = FALSE; |
-static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) { |
- /* Set values for the top - TODO: once we have values for all the indirects, we are going */ |
- /* to initalize here. */ |
- ucolIndirectBoundaries[indexR].startCE = start[0]; |
- ucolIndirectBoundaries[indexR].startContCE = start[1]; |
- if(end) { |
- ucolIndirectBoundaries[indexR].limitCE = end[0]; |
- ucolIndirectBoundaries[indexR].limitContCE = end[1]; |
- } else { |
- ucolIndirectBoundaries[indexR].limitCE = 0; |
- ucolIndirectBoundaries[indexR].limitContCE = 0; |
- } |
-} |
- |
-static void testCEs(UCollator *coll, UErrorCode *status) { |
- const UChar *rules = NULL, *current = NULL; |
- int32_t ruleLen = 0; |
- |
- uint32_t strength = 0; |
- uint32_t maxStrength = UCOL_IDENTICAL; |
- uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE; |
- uint32_t lastCE; |
- uint32_t lastContCE; |
- |
- int32_t result = 0; |
- uint32_t chOffset = 0; uint32_t chLen = 0; |
- uint32_t exOffset = 0; uint32_t exLen = 0; |
- uint32_t prefixOffset = 0; uint32_t prefixLen = 0; |
- uint32_t oldOffset = 0; |
- |
- /* uint32_t rExpsLen = 0; */ |
- /* uint32_t firstLen = 0; */ |
- uint16_t specs = 0; |
- UBool varT = FALSE; UBool top_ = TRUE; |
- UBool startOfRules = TRUE; |
- UBool before = FALSE; |
- UColTokenParser src; |
- UColOptionSet opts; |
- UParseError parseError; |
- UChar *rulesCopy = NULL; |
- collIterate *c = uprv_new_collIterate(status); |
- UCAConstants *consts = NULL; |
- uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */ |
- UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT; |
- const char *colLoc; |
- UCollator *UCA = ucol_open("root", status); |
- |
- (void)varT; /* Suppress set but not used warnings. */ |
- (void)prefixLen; |
- (void)prefixOffset; |
- (void)exLen; |
- (void)exOffset; |
- |
- if (U_FAILURE(*status)) { |
- log_err("Could not open root collator %s\n", u_errorName(*status)); |
- uprv_delete_collIterate(c); |
- return; |
- } |
- |
- colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status); |
- if (U_FAILURE(*status)) { |
- log_err("Could not get collator name: %s\n", u_errorName(*status)); |
- ucol_close(UCA); |
- uprv_delete_collIterate(c); |
- return; |
- } |
- |
- uprv_memset(&src, 0, sizeof(UColTokenParser)); |
- |
- consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts); |
- UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0]; |
- /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */ |
- UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0]; |
- UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1]; |
- |
- baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND; |
- |
- src.opts = &opts; |
- |
- rules = ucol_getRules(coll, &ruleLen); |
- |
- src.invUCA = ucol_initInverseUCA(status); |
- |
- if(indirectBoundariesSet == FALSE) { |
- /* UCOL_RESET_TOP_VALUE */ |
- setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); |
- /* UCOL_FIRST_PRIMARY_IGNORABLE */ |
- setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0); |
- /* UCOL_LAST_PRIMARY_IGNORABLE */ |
- setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0); |
- /* UCOL_FIRST_SECONDARY_IGNORABLE */ |
- setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0); |
- /* UCOL_LAST_SECONDARY_IGNORABLE */ |
- setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0); |
- /* UCOL_FIRST_TERTIARY_IGNORABLE */ |
- setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0); |
- /* UCOL_LAST_TERTIARY_IGNORABLE */ |
- setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0); |
- /* UCOL_FIRST_VARIABLE */ |
- setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0); |
- /* UCOL_LAST_VARIABLE */ |
- setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0); |
- /* UCOL_FIRST_NON_VARIABLE */ |
- setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0); |
- /* UCOL_LAST_NON_VARIABLE */ |
- setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); |
- /* UCOL_FIRST_IMPLICIT */ |
- setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0); |
- /* UCOL_LAST_IMPLICIT */ |
- setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING); |
- /* UCOL_FIRST_TRAILING */ |
- setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0); |
- /* UCOL_LAST_TRAILING */ |
- setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0); |
- ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24); |
- indirectBoundariesSet = TRUE; |
- } |
- |
- |
- if(U_SUCCESS(*status) && ruleLen > 0) { |
- rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); |
- uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); |
- src.current = src.source = rulesCopy; |
- src.end = rulesCopy+ruleLen; |
- src.extraCurrent = src.end; |
- src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; |
- |
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to |
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ |
- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { |
- strength = src.parsedToken.strength; |
- chOffset = src.parsedToken.charsOffset; |
- chLen = src.parsedToken.charsLen; |
- exOffset = src.parsedToken.extensionOffset; |
- exLen = src.parsedToken.extensionLen; |
- prefixOffset = src.parsedToken.prefixOffset; |
- prefixLen = src.parsedToken.prefixLen; |
- specs = src.parsedToken.flags; |
- |
- startOfRules = FALSE; |
- varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); |
- top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); |
- |
- uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status); |
- |
- currCE = ucol_getNextCE(coll, c, status); |
- if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) { |
- log_verbose("Thai prevowel detected. Will pick next CE\n"); |
- currCE = ucol_getNextCE(coll, c, status); |
- } |
- |
- currContCE = ucol_getNextCE(coll, c, status); |
- if(!isContinuation(currContCE)) { |
- currContCE = 0; |
- } |
- |
- /* we need to repack CEs here */ |
- |
- if(strength == UCOL_TOK_RESET) { |
- before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); |
- if(top_ == TRUE) { |
- int32_t tokenIndex = src.parsedToken.indirectIndex; |
- |
- nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE; |
- nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE; |
- } else { |
- nextCE = baseCE = currCE; |
- nextContCE = baseContCE = currContCE; |
- } |
- maxStrength = UCOL_IDENTICAL; |
- } else { |
- if(strength < maxStrength) { |
- maxStrength = strength; |
- if(baseCE == UCOL_RESET_TOP_VALUE) { |
- log_verbose("Resetting to [top]\n"); |
- nextCE = UCOL_NEXT_TOP_VALUE; |
- nextContCE = UCOL_NEXT_TOP_CONT; |
- } else { |
- result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength); |
- } |
- if(result < 0) { |
- if(ucol_isTailored(coll, *(src.source+oldOffset), status)) { |
- log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset)); |
- return; |
- } else { |
- log_err("%s: couldn't find the CE\n", colLoc); |
- return; |
- } |
- } |
- } |
- |
- currCE &= 0xFFFFFF3F; |
- currContCE &= 0xFFFFFFBF; |
- |
- if(maxStrength == UCOL_IDENTICAL) { |
- if(baseCE != currCE || baseContCE != currContCE) { |
- log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); |
- } |
- } else { |
- if(strength == UCOL_IDENTICAL) { |
- if(lastCE != currCE || lastContCE != currContCE) { |
- log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); |
- } |
- } else { |
- if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) { |
- /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/ |
- log_err("%s: current CE is not less than base CE\n", colLoc); |
- } |
- if(!before) { |
- if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) { |
- /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ |
- log_err("%s: sequence of generated CEs is broken\n", colLoc); |
- } |
- } else { |
- before = FALSE; |
- if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) { |
- /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ |
- log_err("%s: sequence of generated CEs is broken\n", colLoc); |
- } |
- } |
- } |
- } |
- |
- } |
- |
- oldOffset = chOffset; |
- lastCE = currCE & 0xFFFFFF3F; |
- lastContCE = currContCE & 0xFFFFFFBF; |
- } |
- uprv_free(src.source); |
- uprv_free(src.reorderCodes); |
- } |
- ucol_close(UCA); |
- uprv_delete_collIterate(c); |
-} |
- |
-#if 0 |
-/* these locales are now picked from index RB */ |
-static const char* localesToTest[] = { |
-"ar", "bg", "ca", "cs", "da", |
-"el", "en_BE", "en_US_POSIX", |
-"es", "et", "fi", "fr", "hi", |
-"hr", "hu", "is", "iw", "ja", |
-"ko", "lt", "lv", "mk", "mt", |
-"nb", "nn", "nn_NO", "pl", "ro", |
-"ru", "sh", "sk", "sl", "sq", |
-"sr", "sv", "th", "tr", "uk", |
-"vi", "zh", "zh_TW" |
-}; |
-#endif |
- |
-static const char* rulesToTest[] = { |
- /* Funky fa rule */ |
- "&\\u0622 < \\u0627 << \\u0671 < \\u0621", |
- /*"& Z < p, P",*/ |
- /* Cui Mins rules */ |
- "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/ |
- "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ |
- "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/ |
- "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ |
- "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/ |
- "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/ |
- "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/ |
-}; |
- |
- |
-static void TestCollations(void) { |
- int32_t noOfLoc = uloc_countAvailable(); |
- int32_t i = 0, j = 0; |
- |
- UErrorCode status = U_ZERO_ERROR; |
- char cName[256]; |
- UChar name[256]; |
- int32_t nameSize; |
- |
- |
- const char *locName = NULL; |
- UCollator *coll = NULL; |
- UCollator *UCA = ucol_open("", &status); |
- UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status); |
- if (U_FAILURE(status)) { |
- log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status)); |
- return; |
- } |
- ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status); |
- |
- for(i = 0; i<noOfLoc; i++) { |
- status = U_ZERO_ERROR; |
- locName = uloc_getAvailable(i); |
- if(uprv_strcmp("ja", locName) == 0) { |
- log_verbose("Don't know how to test prefixes\n"); |
- continue; |
- } |
- if(hasCollationElements(locName)) { |
- nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status); |
- for(j = 0; j<nameSize; j++) { |
- cName[j] = (char)name[j]; |
- } |
- cName[nameSize] = 0; |
- log_verbose("\nTesting locale %s (%s)\n", locName, cName); |
- coll = ucol_open(locName, &status); |
- if(U_SUCCESS(status)) { |
- testAgainstUCA(coll, UCA, "UCA", FALSE, &status); |
- ucol_close(coll); |
- } else { |
- log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status)); |
- status = U_ZERO_ERROR; |
- } |
- } |
- } |
- ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status); |
- ucol_close(UCA); |
-} |
- |
-static void RamsRulesTest(void) { |
- UErrorCode status = U_ZERO_ERROR; |
- int32_t i = 0; |
- UCollator *coll = NULL; |
- UChar rule[2048]; |
- uint32_t ruleLen; |
- int32_t noOfLoc = uloc_countAvailable(); |
- const char *locName = NULL; |
- |
- log_verbose("RamsRulesTest\n"); |
- |
- if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) { |
- /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */ |
- return; |
- } |
- |
- for(i = 0; i<noOfLoc; i++) { |
- locName = uloc_getAvailable(i); |
- if(hasCollationElements(locName)) { |
- if (uprv_strcmp("ja", locName)==0) { |
- log_verbose("Don't know how to test Japanese because of prefixes\n"); |
- continue; |
- } |
- if (uprv_strcmp("de__PHONEBOOK", locName)==0) { |
- log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n"); |
- continue; |
- } |
- if (uprv_strcmp("bn", locName)==0 || |
- uprv_strcmp("bs", locName)==0 || /* Add due to import per cldrbug 5647 */ |
- uprv_strcmp("bs_Cyrl", locName)==0 || /* Add due to import per cldrbug 5647 */ |
- uprv_strcmp("en_US_POSIX", locName)==0 || |
- uprv_strcmp("fa", locName)==0 || /* Add in #10222 with CLDR 24 integration */ |
- uprv_strcmp("fa_AF", locName)==0 || /* Add due to import per cldrbug 5647 */ |
- uprv_strcmp("gl", locName)==0 || /* Add due to import per cldrbug 5647 */ |
- uprv_strcmp("gl_ES", locName)==0 || /* Add due to import per cldrbug 5647 */ |
- uprv_strcmp("he", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */ |
- uprv_strcmp("he_IL", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */ |
- uprv_strcmp("km", locName)==0 || |
- uprv_strcmp("km_KH", locName)==0 || |
- uprv_strcmp("my", locName)==0 || |
- uprv_strcmp("ps", locName)==0 || /* Add in #10222 with CLDR 24 integration */ |
- uprv_strcmp("si", locName)==0 || |
- uprv_strcmp("si_LK", locName)==0 || |
- uprv_strcmp("sr_Latn", locName)==0 || /* Add due to import per cldrbug 5647 */ |
- uprv_strcmp("th", locName)==0 || |
- uprv_strcmp("th_TH", locName)==0 || |
- uprv_strcmp("zh", locName)==0 || |
- uprv_strcmp("zh_Hant", locName)==0 |
- ) { |
- if(log_knownIssue("6040", NULL)) { |
- log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName); |
- continue; |
- } |
- } |
- log_verbose("Testing locale %s\n", locName); |
- status = U_ZERO_ERROR; |
- coll = ucol_open(locName, &status); |
- if(U_SUCCESS(status)) { |
- if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) { |
- if(coll->image->jamoSpecial == TRUE) { |
- log_err("%s has special JAMOs\n", locName); |
- } |
- ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); |
- testCollator(coll, &status); |
- testCEs(coll, &status); |
- } else { |
- log_verbose("Skipping %s: %s\n", locName, u_errorName(status)); |
- } |
- ucol_close(coll); |
- } else { |
- log_err("Could not open %s: %s\n", locName, u_errorName(status)); |
- } |
- } |
- } |
- |
- for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) { |
- log_verbose("Testing rule: %s\n", rulesToTest[i]); |
- ruleLen = u_unescape(rulesToTest[i], rule, 2048); |
- status = U_ZERO_ERROR; |
- coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); |
- if(U_SUCCESS(status)) { |
- testCollator(coll, &status); |
- testCEs(coll, &status); |
- ucol_close(coll); |
- } else { |
- log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]); |
- } |
- } |
- |
-} |
- |
-static void IsTailoredTest(void) { |
- UErrorCode status = U_ZERO_ERROR; |
- uint32_t i = 0; |
- UCollator *coll = NULL; |
- UChar rule[2048]; |
- UChar tailored[2048]; |
- UChar notTailored[2048]; |
- uint32_t ruleLen, tailoredLen, notTailoredLen; |
- |
- log_verbose("IsTailoredTest\n"); |
- |
- u_uastrcpy(rule, "&Z < A, B, C;c < d"); |
- ruleLen = u_strlen(rule); |
- |
- u_uastrcpy(tailored, "ABCcd"); |
- tailoredLen = u_strlen(tailored); |
- |
- u_uastrcpy(notTailored, "ZabD"); |
- notTailoredLen = u_strlen(notTailored); |
- |
- coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); |
- if(U_SUCCESS(status)) { |
- for(i = 0; i<tailoredLen; i++) { |
- if(!ucol_isTailored(coll, tailored[i], &status)) { |
- log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]); |
- } |
- } |
- for(i = 0; i<notTailoredLen; i++) { |
- if(ucol_isTailored(coll, notTailored[i], &status)) { |
- log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]); |
- } |
- } |
- ucol_close(coll); |
- } |
- else { |
- log_err_status(status, "Can't tailor rules\n"); |
- } |
- /* Code coverage */ |
- status = U_ZERO_ERROR; |
- coll = ucol_open("ja", &status); |
- if(!ucol_isTailored(coll, 0x4E9C, &status)) { |
- log_err_status(status, "0x4E9C should be tailored - it is reported as not\n"); |
- } |
- ucol_close(coll); |
-} |
- |
- |
const static char chTest[][20] = { |
"c", |
"C", |
@@ -1573,6 +461,7 @@ static void TestChMove(void) { |
+/* |
const static char impTest[][20] = { |
"\\u4e00", |
"a", |
@@ -1581,6 +470,7 @@ const static char impTest[][20] = { |
"B", |
"\\u4e01" |
}; |
+*/ |
static void TestImplicitTailoring(void) { |
@@ -1589,7 +479,12 @@ static void TestImplicitTailoring(void) { |
const char *data[10]; |
const uint32_t len; |
} tests[] = { |
- { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 }, |
+ { |
+ /* Tailor b and c before U+4E00. */ |
+ "&[before 1]\\u4e00 < b < c " |
+ /* Now, before U+4E00 is c; put d and e after that. */ |
+ "&[before 1]\\u4e00 < d < e", |
+ { "b", "c", "d", "e", "\\u4e00"}, 5 }, |
{ "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 }, |
{ "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3}, |
{ "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3} |
@@ -1934,205 +829,6 @@ static void TestJ815(void) { |
} |
-/* |
-"& a < b < c < d& r < c", "& a < b < d& r < c", |
-"& a < b < c < d& c < m", "& a < b < c < m < d", |
-"& a < b < c < d& a < m", "& a < m < b < c < d", |
-"& a <<< b << c < d& a < m", "& a <<< b << c < m < d", |
-"& a < b < c < d& [before 1] c < m", "& a < b < m < c < d", |
-"& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e", |
-"& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e", |
-"& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e", |
-"& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g", |
-*/ |
-static void TestRedundantRules(void) { |
- int32_t i; |
- |
- static const struct { |
- const char *rules; |
- const char *expectedRules; |
- const char *testdata[8]; |
- uint32_t testdatalen; |
- } tests[] = { |
- /* this test conflicts with positioning of CODAN placeholder */ |
- /*{ |
- "& a <<< b <<< c << d <<< e& [before 1] e <<< x", |
- "&\\u2089<<<x", |
- {"\\u2089", "x"}, 2 |
- }, */ |
- /* this test conflicts with the [before x] syntax tightening */ |
- /*{ |
- "& b <<< c <<< d << e <<< f& [before 1] f <<< x", |
- "&\\u0252<<<x", |
- {"\\u0252", "x"}, 2 |
- }, */ |
- /* this test conflicts with the [before x] syntax tightening */ |
- /*{ |
- "& a < b <<< c << d <<< e& [before 1] e <<< x", |
- "& a <<< x < b <<< c << d <<< e", |
- {"a", "x", "b", "c", "d", "e"}, 6 |
- }, */ |
- { |
- "& a < b < c < d& [before 1] c < m", |
- "& a < b < m < c < d", |
- {"a", "b", "m", "c", "d"}, 5 |
- }, |
- { |
- "& a < b <<< c << d <<< e& [before 3] e <<< x", |
- "& a < b <<< c << d <<< x <<< e", |
- {"a", "b", "c", "d", "x", "e"}, 6 |
- }, |
- /* this test conflicts with the [before x] syntax tightening */ |
- /* { |
- "& a < b <<< c << d <<< e& [before 2] e <<< x", |
- "& a < b <<< c <<< x << d <<< e", |
- {"a", "b", "c", "x", "d", "e"},, 6 |
- }, */ |
- { |
- "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", |
- "& a < b <<< c << d <<< e <<< f < x < g", |
- {"a", "b", "c", "d", "e", "f", "x", "g"}, 8 |
- }, |
- { |
- "& a <<< b << c < d& a < m", |
- "& a <<< b << c < m < d", |
- {"a", "b", "c", "m", "d"}, 5 |
- }, |
- { |
- "&a<b<<b\\u0301 &z<b", |
- "&a<b\\u0301 &z<b", |
- {"a", "b\\u0301", "z", "b"}, 4 |
- }, |
- { |
- "&z<m<<<q<<<m", |
- "&z<q<<<m", |
- {"z", "q", "m"},3 |
- }, |
- { |
- "&z<<<m<q<<<m", |
- "&z<q<<<m", |
- {"z", "q", "m"}, 3 |
- }, |
- { |
- "& a < b < c < d& r < c", |
- "& a < b < d& r < c", |
- {"a", "b", "d"}, 3 |
- }, |
- { |
- "& a < b < c < d& r < c", |
- "& a < b < d& r < c", |
- {"r", "c"}, 2 |
- }, |
- { |
- "& a < b < c < d& c < m", |
- "& a < b < c < m < d", |
- {"a", "b", "c", "m", "d"}, 5 |
- }, |
- { |
- "& a < b < c < d& a < m", |
- "& a < m < b < c < d", |
- {"a", "m", "b", "c", "d"}, 5 |
- } |
- }; |
- |
- |
- UCollator *credundant = NULL; |
- UCollator *cresulting = NULL; |
- UErrorCode status = U_ZERO_ERROR; |
- UChar rlz[2048] = { 0 }; |
- uint32_t rlen = 0; |
- |
- for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) { |
- log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules); |
- rlen = u_unescape(tests[i].rules, rlz, 2048); |
- |
- credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); |
- if(status == U_FILE_ACCESS_ERROR) { |
- log_data_err("Is your data around?\n"); |
- return; |
- } else if(U_FAILURE(status)) { |
- log_err("Error opening collator\n"); |
- return; |
- } |
- |
- rlen = u_unescape(tests[i].expectedRules, rlz, 2048); |
- cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); |
- |
- testAgainstUCA(cresulting, credundant, "expected", TRUE, &status); |
- |
- ucol_close(credundant); |
- ucol_close(cresulting); |
- |
- log_verbose("testing using data\n"); |
- |
- genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen); |
- } |
- |
-} |
- |
-static void TestExpansionSyntax(void) { |
- int32_t i; |
- |
- const static char *rules[] = { |
- "&AE <<< a << b <<< c &d <<< f", |
- "&AE <<< a <<< b << c << d < e < f <<< g", |
- "&AE <<< B <<< C / D <<< F" |
- }; |
- |
- const static char *expectedRules[] = { |
- "&A <<< a / E << b / E <<< c /E &d <<< f", |
- "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g", |
- "&A <<< B / E <<< C / ED <<< F / E" |
- }; |
- |
- const static char *testdata[][8] = { |
- {"AE", "a", "b", "c"}, |
- {"AE", "a", "b", "c", "d", "e", "f", "g"}, |
- {"AE", "B", "C"} /* / ED <<< F / E"},*/ |
- }; |
- |
- const static uint32_t testdatalen[] = { |
- 4, |
- 8, |
- 3 |
- }; |
- |
- |
- |
- UCollator *credundant = NULL; |
- UCollator *cresulting = NULL; |
- UErrorCode status = U_ZERO_ERROR; |
- UChar rlz[2048] = { 0 }; |
- uint32_t rlen = 0; |
- |
- for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) { |
- log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]); |
- rlen = u_unescape(rules[i], rlz, 2048); |
- |
- credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); |
- if(status == U_FILE_ACCESS_ERROR) { |
- log_data_err("Is your data around?\n"); |
- return; |
- } else if(U_FAILURE(status)) { |
- log_err("Error opening collator\n"); |
- return; |
- } |
- rlen = u_unescape(expectedRules[i], rlz, 2048); |
- cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); |
- |
- /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */ |
- /* as a hard error test, but only in information mode */ |
- testAgainstUCA(cresulting, credundant, "expected", FALSE, &status); |
- |
- ucol_close(credundant); |
- ucol_close(cresulting); |
- |
- log_verbose("testing using data\n"); |
- |
- genericRulesStarter(rules[i], testdata[i], testdatalen[i]); |
- } |
-} |
- |
static void TestCase(void) |
{ |
const static UChar gRules[MAX_TOKEN_LEN] = |
@@ -2246,13 +942,13 @@ static void TestCase(void) |
}; |
log_verbose("mixed case test\n"); |
log_verbose("lower first, case level off\n"); |
- genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); |
+ genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); |
log_verbose("upper first, case level off\n"); |
- genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); |
+ genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); |
log_verbose("lower first, case level on\n"); |
- genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); |
+ genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); |
log_verbose("upper first, case level on\n"); |
- genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); |
+ genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); |
} |
} |
@@ -2560,25 +1256,32 @@ static void TestHangulTailoring(void) { |
log_err("Unable to open collator with rules %s\n", rules); |
} |
- log_verbose("Setting jamoSpecial to TRUE and testing once more\n"); |
- ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */ |
- genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); |
- |
ucol_close(coll); |
log_verbose("Using ko__LOTUS locale\n"); |
genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0])); |
} |
+/* |
+ * The secondary/tertiary compression middle byte |
+ * as used by the current implementation. |
+ * Subject to change as the sort key compression changes. |
+ * See class CollationKeys. |
+ */ |
+enum { |
+ SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */ |
+ TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */ |
+}; |
+ |
static void TestCompressOverlap(void) { |
UChar secstr[150]; |
UChar tertstr[150]; |
UErrorCode status = U_ZERO_ERROR; |
UCollator *coll; |
- char result[200]; |
+ uint8_t result[500]; |
uint32_t resultlen; |
int count = 0; |
- char *tempptr; |
+ uint8_t *tempptr; |
coll = ucol_open("", &status); |
@@ -2598,29 +1301,29 @@ static void TestCompressOverlap(void) { |
/* no compression secstr should have 150 secondary bytes, tertstr should |
have 150 tertiary bytes. |
- with correct overlapping compression, secstr should have 4 secondary |
- bytes, tertstr should have > 2 tertiary bytes */ |
- resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); |
+ with correct compression, secstr should have 6 secondary |
+ bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */ |
+ resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result)); |
(void)resultlen; /* Suppress set but not used warning. */ |
- tempptr = uprv_strchr(result, 1) + 1; |
+ tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1; |
while (*(tempptr + 1) != 1) { |
/* the last secondary collation element is not checked since it is not |
part of the compression */ |
- if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) { |
- log_err("Secondary compression overlapped\n"); |
+ if (*tempptr < SEC_COMMON_MIDDLE) { |
+ log_err("Secondary top down compression overlapped\n"); |
} |
tempptr ++; |
} |
/* tertiary top/bottom/common for en_US is similar to the secondary |
top/bottom/common */ |
- resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); |
- tempptr = uprv_strrchr(result, 1) + 1; |
+ resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result)); |
+ tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1; |
while (*(tempptr + 1) != 0) { |
/* the last secondary collation element is not checked since it is not |
part of the compression */ |
- if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) { |
- log_err("Tertiary compression overlapped\n"); |
+ if (*tempptr < TER_ONLY_COMMON_MIDDLE) { |
+ log_err("Tertiary top down compression overlapped\n"); |
} |
tempptr ++; |
} |
@@ -2628,26 +1331,26 @@ static void TestCompressOverlap(void) { |
/* bottom up compression ------------------------------------- */ |
secstr[count] = 0; |
tertstr[count] = 0; |
- resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); |
- tempptr = uprv_strchr(result, 1) + 1; |
+ resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result)); |
+ tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1; |
while (*(tempptr + 1) != 1) { |
/* the last secondary collation element is not checked since it is not |
part of the compression */ |
- if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) { |
- log_err("Secondary compression overlapped\n"); |
+ if (*tempptr > SEC_COMMON_MIDDLE) { |
+ log_err("Secondary bottom up compression overlapped\n"); |
} |
tempptr ++; |
} |
/* tertiary top/bottom/common for en_US is similar to the secondary |
top/bottom/common */ |
- resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); |
- tempptr = uprv_strrchr(result, 1) + 1; |
+ resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result)); |
+ tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1; |
while (*(tempptr + 1) != 0) { |
/* the last secondary collation element is not checked since it is not |
part of the compression */ |
- if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) { |
- log_err("Tertiary compression overlapped\n"); |
+ if (*tempptr > TER_ONLY_COMMON_MIDDLE) { |
+ log_err("Tertiary bottom up compression overlapped\n"); |
} |
tempptr ++; |
} |
@@ -2707,6 +1410,13 @@ static void TestContraction(void) { |
{0x0063 /* 'c' */, 0x0068 /* 'h' */}, |
{0x0063 /* 'c' */, 0x006C /* 'l' */} |
}; |
+#if 0 |
+ /* |
+ * These pairs of rule strings are not guaranteed to yield the very same mappings. |
+ * In fact, LDML 24 recommends an improved way of creating mappings |
+ * which always yields different mappings for such pairs. See |
+ * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings |
+ */ |
const static char *testrules3[] = { |
"&z < xyz &xyzw << B", |
"&z < xyz &xyz << B / w", |
@@ -2717,6 +1427,7 @@ static void TestContraction(void) { |
"&a\\ud800\\udc00m << B", |
"&a << B / \\ud800\\udc00m", |
}; |
+#endif |
UErrorCode status = U_ZERO_ERROR; |
UCollator *coll; |
@@ -2782,8 +1493,9 @@ static void TestContraction(void) { |
return; |
} |
ucol_close(coll); |
- |
+#if 0 /* see above */ |
for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { |
+ log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]); |
UCollator *coll1, |
*coll2; |
UCollationElements *iter1, |
@@ -2810,8 +1522,11 @@ static void TestContraction(void) { |
return; |
} |
while (ce != UCOL_NULLORDER) { |
- if (ce != (uint32_t)ucol_next(iter2, &status)) { |
- log_err("CEs does not match\n"); |
+ uint32_t ce2 = (uint32_t)ucol_next(iter2, &status); |
+ if (ce == ce2) { |
+ log_verbose("CEs match: %08x\n", ce); |
+ } else { |
+ log_err("CEs do not match: %08x vs. %08x\n", ce, ce2); |
return; |
} |
ce = ucol_next(iter1, &status); |
@@ -2829,11 +1544,23 @@ static void TestContraction(void) { |
ucol_close(coll1); |
ucol_close(coll2); |
} |
+#endif |
} |
static void TestExpansion(void) { |
const static char *testrules[] = { |
+#if 0 |
+ /* |
+ * This seems to have tested that M was not mapped to an expansion. |
+ * I believe the old builder just did that because it computed the extension CEs |
+ * at the very end, which was a bug. |
+ * Among other problems, it violated the core tailoring principle |
+ * by making an earlier rule depend on a later one. |
+ * And, of course, if M did not get an expansion, then it was primary different from K, |
+ * unlike what the rule &K<<M says. |
+ */ |
"&J << K / B & K << M", |
+#endif |
"&J << K / B << M" |
}; |
const static UChar testdata[][3] = { |
@@ -2983,207 +1710,81 @@ static void TestBocsuCoverage(void) { |
static void TestVariableTopSetting(void) { |
UErrorCode status = U_ZERO_ERROR; |
- const UChar *current = NULL; |
uint32_t varTopOriginal = 0, varTop1, varTop2; |
UCollator *coll = ucol_open("", &status); |
if(U_SUCCESS(status)) { |
- uint32_t strength = 0; |
- uint16_t specs = 0; |
- uint32_t chOffset = 0; |
- uint32_t chLen = 0; |
- uint32_t exOffset = 0; |
- uint32_t exLen = 0; |
- uint32_t oldChOffset = 0; |
- uint32_t oldChLen = 0; |
- uint32_t oldExOffset = 0; |
- uint32_t oldExLen = 0; |
- uint32_t prefixOffset = 0; |
- uint32_t prefixLen = 0; |
- |
- UBool startOfRules = TRUE; |
- UColTokenParser src; |
- UColOptionSet opts; |
- |
- UChar *rulesCopy = NULL; |
- uint32_t rulesLen; |
- |
- UCollationResult result; |
+ static const UChar nul = 0; |
+ static const UChar space = 0x20; |
+ static const UChar dot = 0x2e; /* punctuation */ |
+ static const UChar degree = 0xb0; /* symbol */ |
+ static const UChar dollar = 0x24; /* currency symbol */ |
+ static const UChar zero = 0x30; /* digit */ |
- UChar first[256] = { 0 }; |
- UChar second[256] = { 0 }; |
- UParseError parseError; |
- int32_t myQ = getTestOption(QUICK_OPTION); |
- |
- (void)prefixLen; /* Suppress set but not used warnings. */ |
- (void)prefixOffset; |
- (void)specs; |
- |
- uprv_memset(&src, 0, sizeof(UColTokenParser)); |
- |
- src.opts = &opts; |
+ varTopOriginal = ucol_getVariableTop(coll, &status); |
+ log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal); |
+ ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); |
- if(getTestOption(QUICK_OPTION) <= 0) { |
- setTestOption(QUICK_OPTION, 1); |
+ varTop1 = ucol_setVariableTop(coll, &space, 1, &status); |
+ varTop2 = ucol_getVariableTop(coll, &status); |
+ log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1); |
+ if(U_FAILURE(status) || varTop1 != varTop2 || |
+ !ucol_equal(coll, &nul, 0, &space, 1) || |
+ ucol_equal(coll, &nul, 0, &dot, 1) || |
+ ucol_equal(coll, &nul, 0, °ree, 1) || |
+ ucol_equal(coll, &nul, 0, &dollar, 1) || |
+ ucol_equal(coll, &nul, 0, &zero, 1) || |
+ ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) { |
+ log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status)); |
} |
- /* this test will fail when normalization is turned on */ |
- /* therefore we always turn off exhaustive mode for it */ |
- { /* QUICK > 0*/ |
- log_verbose("Slide variable top over UCARules\n"); |
- rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0); |
- rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); |
- rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE); |
- |
- if(U_SUCCESS(status) && rulesLen > 0) { |
- ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); |
- src.current = src.source = rulesCopy; |
- src.end = rulesCopy+rulesLen; |
- src.extraCurrent = src.end; |
- src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; |
- |
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to |
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ |
- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) { |
- strength = src.parsedToken.strength; |
- chOffset = src.parsedToken.charsOffset; |
- chLen = src.parsedToken.charsLen; |
- exOffset = src.parsedToken.extensionOffset; |
- exLen = src.parsedToken.extensionLen; |
- prefixOffset = src.parsedToken.prefixOffset; |
- prefixLen = src.parsedToken.prefixLen; |
- specs = src.parsedToken.flags; |
- |
- startOfRules = FALSE; |
- { |
- log_verbose("%04X %d ", *(src.source+chOffset), chLen); |
- } |
- if(strength == UCOL_PRIMARY) { |
- status = U_ZERO_ERROR; |
- varTopOriginal = ucol_getVariableTop(coll, &status); |
- varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status); |
- if(U_FAILURE(status)) { |
- char buffer[256]; |
- char *buf = buffer; |
- uint32_t i = 0, j; |
- uint32_t CE = UCOL_NO_MORE_CES; |
- |
- /* before we start screaming, let's see if there is a problem with the rules */ |
- UErrorCode collIterateStatus = U_ZERO_ERROR; |
- collIterate *s = uprv_new_collIterate(&collIterateStatus); |
- uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus); |
- |
- CE = ucol_getNextCE(coll, s, &status); |
- (void)CE; /* Suppress set but not used warning. */ |
- |
- for(i = 0; i < oldChLen; i++) { |
- j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i)); |
- buf += j; |
- } |
- if(status == U_PRIMARY_TOO_LONG_ERROR) { |
- log_verbose("= Expected failure for %s =", buffer); |
- } else { |
- if(uprv_collIterateAtEnd(s)) { |
- log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n", |
- oldChOffset, u_errorName(status), buffer); |
- } else { |
- log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n", |
- buffer); |
- } |
- } |
- uprv_delete_collIterate(s); |
- } |
- varTop2 = ucol_getVariableTop(coll, &status); |
- if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) { |
- log_err("cannot retrieve set varTop value!\n"); |
- continue; |
- } |
+ varTop1 = ucol_setVariableTop(coll, &dot, 1, &status); |
+ varTop2 = ucol_getVariableTop(coll, &status); |
+ log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1); |
+ if(U_FAILURE(status) || varTop1 != varTop2 || |
+ !ucol_equal(coll, &nul, 0, &space, 1) || |
+ !ucol_equal(coll, &nul, 0, &dot, 1) || |
+ ucol_equal(coll, &nul, 0, °ree, 1) || |
+ ucol_equal(coll, &nul, 0, &dollar, 1) || |
+ ucol_equal(coll, &nul, 0, &zero, 1) || |
+ ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) { |
+ log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status)); |
+ } |
- if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) { |
- |
- u_strncpy(first, src.source+oldChOffset, oldChLen); |
- u_strncpy(first+oldChLen, src.source+chOffset, chLen); |
- u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen); |
- first[2*oldChLen+chLen] = 0; |
- |
- if(oldExLen == 0) { |
- u_strncpy(second, src.source+chOffset, chLen); |
- second[chLen] = 0; |
- } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */ |
- u_strncpy(second, src.source+oldExOffset, oldExLen); |
- u_strncpy(second+oldChLen, src.source+chOffset, chLen); |
- u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen); |
- second[2*oldExLen+chLen] = 0; |
- } |
- result = ucol_strcoll(coll, first, -1, second, -1); |
- if(result == UCOL_EQUAL) { |
- doTest(coll, first, second, UCOL_EQUAL); |
- } else { |
- log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset)); |
- } |
- } |
- } |
- if(strength != UCOL_TOK_RESET) { |
- oldChOffset = chOffset; |
- oldChLen = chLen; |
- oldExOffset = exOffset; |
- oldExLen = exLen; |
- } |
- } |
- status = U_ZERO_ERROR; |
- } |
- else { |
- log_err("Unexpected failure getting rules %s\n", u_errorName(status)); |
- return; |
- } |
- if (U_FAILURE(status)) { |
- log_err("Error parsing rules %s\n", u_errorName(status)); |
- return; |
- } |
- status = U_ZERO_ERROR; |
+ varTop1 = ucol_setVariableTop(coll, °ree, 1, &status); |
+ varTop2 = ucol_getVariableTop(coll, &status); |
+ log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1); |
+ if(U_FAILURE(status) || varTop1 != varTop2 || |
+ !ucol_equal(coll, &nul, 0, &space, 1) || |
+ !ucol_equal(coll, &nul, 0, &dot, 1) || |
+ !ucol_equal(coll, &nul, 0, °ree, 1) || |
+ ucol_equal(coll, &nul, 0, &dollar, 1) || |
+ ucol_equal(coll, &nul, 0, &zero, 1) || |
+ ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) { |
+ log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status)); |
} |
- setTestOption(QUICK_OPTION, myQ); |
+ varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status); |
+ varTop2 = ucol_getVariableTop(coll, &status); |
+ log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1); |
+ if(U_FAILURE(status) || varTop1 != varTop2 || |
+ !ucol_equal(coll, &nul, 0, &space, 1) || |
+ !ucol_equal(coll, &nul, 0, &dot, 1) || |
+ !ucol_equal(coll, &nul, 0, °ree, 1) || |
+ !ucol_equal(coll, &nul, 0, &dollar, 1) || |
+ ucol_equal(coll, &nul, 0, &zero, 1) || |
+ ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) { |
+ log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status)); |
+ } |
log_verbose("Testing setting variable top to contractions\n"); |
{ |
- UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos); |
- int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth; |
- while(*conts != 0) { |
- /* |
- * A continuation is NUL-terminated and NUL-padded |
- * except if it has the maximum length. |
- */ |
- int32_t contractionLength = maxUCAContractionLength; |
- while(contractionLength > 0 && conts[contractionLength - 1] == 0) { |
- --contractionLength; |
- } |
- if(*(conts+1)==0) { /* pre-context */ |
- varTop1 = ucol_setVariableTop(coll, conts, 1, &status); |
- } else { |
- varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status); |
- } |
- if(U_FAILURE(status)) { |
- if(status == U_PRIMARY_TOO_LONG_ERROR) { |
- /* ucol_setVariableTop() is documented to not accept 3-byte primaries, |
- * therefore it is not an error when it complains about them. */ |
- log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n", |
- *conts, *(conts+1), *(conts+2)); |
- } else { |
- log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n", |
- *conts, *(conts+1), *(conts+2), u_errorName(status)); |
- } |
- status = U_ZERO_ERROR; |
- } |
- conts+=maxUCAContractionLength; |
- } |
- |
- status = U_ZERO_ERROR; |
- |
+ UChar first[4] = { 0 }; |
first[0] = 0x0040; |
first[1] = 0x0050; |
first[2] = 0x0000; |
+ status = U_ZERO_ERROR; |
ucol_setVariableTop(coll, first, -1, &status); |
if(U_SUCCESS(status)) { |
@@ -3203,21 +1804,110 @@ static void TestVariableTopSetting(void) { |
log_verbose("Testing calling with error set\n"); |
status = U_INTERNAL_PROGRAM_ERROR; |
- varTop1 = ucol_setVariableTop(coll, first, 1, &status); |
+ varTop1 = ucol_setVariableTop(coll, &space, 1, &status); |
varTop2 = ucol_getVariableTop(coll, &status); |
ucol_restoreVariableTop(coll, varTop2, &status); |
- varTop1 = ucol_setVariableTop(NULL, first, 1, &status); |
+ varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status); |
varTop2 = ucol_getVariableTop(NULL, &status); |
ucol_restoreVariableTop(NULL, varTop2, &status); |
if(status != U_INTERNAL_PROGRAM_ERROR) { |
log_err("Bad reaction to passed error!\n"); |
} |
- uprv_free(src.source); |
ucol_close(coll); |
} else { |
log_data_err("Couldn't open UCA collator\n"); |
} |
+} |
+ |
+static void TestMaxVariable() { |
+ UErrorCode status = U_ZERO_ERROR; |
+ UColReorderCode oldMax, max; |
+ UCollator *coll; |
+ |
+ static const UChar nul = 0; |
+ static const UChar space = 0x20; |
+ static const UChar dot = 0x2e; /* punctuation */ |
+ static const UChar degree = 0xb0; /* symbol */ |
+ static const UChar dollar = 0x24; /* currency symbol */ |
+ static const UChar zero = 0x30; /* digit */ |
+ |
+ coll = ucol_open("", &status); |
+ if(U_FAILURE(status)) { |
+ log_data_err("Couldn't open root collator\n"); |
+ return; |
+ } |
+ |
+ oldMax = ucol_getMaxVariable(coll); |
+ log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax); |
+ ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); |
+ |
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status); |
+ max = ucol_getMaxVariable(coll); |
+ log_verbose("ucol_setMaxVariable(space) -> %04x\n", max); |
+ if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE || |
+ !ucol_equal(coll, &nul, 0, &space, 1) || |
+ ucol_equal(coll, &nul, 0, &dot, 1) || |
+ ucol_equal(coll, &nul, 0, °ree, 1) || |
+ ucol_equal(coll, &nul, 0, &dollar, 1) || |
+ ucol_equal(coll, &nul, 0, &zero, 1) || |
+ ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) { |
+ log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status)); |
+ } |
+ |
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status); |
+ max = ucol_getMaxVariable(coll); |
+ log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max); |
+ if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION || |
+ !ucol_equal(coll, &nul, 0, &space, 1) || |
+ !ucol_equal(coll, &nul, 0, &dot, 1) || |
+ ucol_equal(coll, &nul, 0, °ree, 1) || |
+ ucol_equal(coll, &nul, 0, &dollar, 1) || |
+ ucol_equal(coll, &nul, 0, &zero, 1) || |
+ ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) { |
+ log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status)); |
+ } |
+ |
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status); |
+ max = ucol_getMaxVariable(coll); |
+ log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max); |
+ if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL || |
+ !ucol_equal(coll, &nul, 0, &space, 1) || |
+ !ucol_equal(coll, &nul, 0, &dot, 1) || |
+ !ucol_equal(coll, &nul, 0, °ree, 1) || |
+ ucol_equal(coll, &nul, 0, &dollar, 1) || |
+ ucol_equal(coll, &nul, 0, &zero, 1) || |
+ ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) { |
+ log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status)); |
+ } |
+ |
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status); |
+ max = ucol_getMaxVariable(coll); |
+ log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max); |
+ if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY || |
+ !ucol_equal(coll, &nul, 0, &space, 1) || |
+ !ucol_equal(coll, &nul, 0, &dot, 1) || |
+ !ucol_equal(coll, &nul, 0, °ree, 1) || |
+ !ucol_equal(coll, &nul, 0, &dollar, 1) || |
+ ucol_equal(coll, &nul, 0, &zero, 1) || |
+ ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) { |
+ log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status)); |
+ } |
+ |
+ log_verbose("Test restoring maxVariable\n"); |
+ status = U_ZERO_ERROR; |
+ ucol_setMaxVariable(coll, oldMax, &status); |
+ if(oldMax != ucol_getMaxVariable(coll)) { |
+ log_err("Couldn't restore old maxVariable\n"); |
+ } |
+ log_verbose("Testing calling with error set\n"); |
+ status = U_INTERNAL_PROGRAM_ERROR; |
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status); |
+ max = ucol_getMaxVariable(coll); |
+ if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) { |
+ log_err("Bad reaction to passed error!\n"); |
+ } |
+ ucol_close(coll); |
} |
static void TestNonChars(void) { |
@@ -3702,6 +2392,8 @@ static void TestRuleOptions(void) { |
const char *data[10]; |
const uint32_t len; |
} tests[] = { |
+#if 0 |
+ /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */ |
/* - all befores here amount to zero */ |
{ "&[before 3][first tertiary ignorable]<<<a", |
{ "\\u0000", "a"}, 2 |
@@ -3710,25 +2402,35 @@ static void TestRuleOptions(void) { |
{ "&[before 3][last tertiary ignorable]<<<a", |
{ "\\u0000", "a"}, 2 |
}, /* you cannot go before last tertiary ignorable */ |
- |
+#endif |
+ /* |
+ * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt), |
+ * and it *is* possible to "go before" that. |
+ */ |
{ "&[before 3][first secondary ignorable]<<<a", |
{ "\\u0000", "a"}, 2 |
- }, /* you cannot go before first secondary ignorable */ |
+ }, |
{ "&[before 3][last secondary ignorable]<<<a", |
{ "\\u0000", "a"}, 2 |
- }, /* you cannot go before first secondary ignorable */ |
+ }, |
/* 'normal' befores */ |
- { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a", |
+ /* |
+ * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt, |
+ * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a |
+ * because there is no tailoring space before that boundary. |
+ * Made the tests work by tailoring to a space instead. |
+ */ |
+ { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */ |
{ "c", "b", "\\u0332", "a" }, 4 |
}, |
/* we don't have a code point that corresponds to |
* the last primary ignorable |
*/ |
- { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a", |
+ { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */ |
{ "\\u0332", "\\u20e3", "c", "b", "a" }, 5 |
}, |
@@ -3754,14 +2456,14 @@ static void TestRuleOptions(void) { |
"&[first implicit]<a", |
{ "b", "\\u4e00", "a", "\\u4e01"}, 4 |
}, |
- |
+#if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */ |
{ "&[before 1][last implicit]<b" |
"&[last implicit]<a", |
{ "b", "\\U0010FFFD", "a" }, 3 |
}, |
- |
+#endif |
{ "&[last variable]<z" |
- "&[last primary ignorable]<x" |
+ "&' '<x" /* was &[last primary ignorable]<x, see above */ |
"&[last secondary ignorable]<<y" |
"&[last tertiary ignorable]<<<w" |
"&[top]<u", |
@@ -4007,7 +2709,7 @@ static void TestPartialSortKeyTermination(void) { |
"\\udc00\\ud800\\ud800" |
}; |
- int32_t i = sizeof(UCollator); |
+ int32_t i; |
UErrorCode status = U_ZERO_ERROR; |
@@ -4081,7 +2783,7 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo |
errorNo++; |
} |
ucol_close(target); |
- if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { |
+ if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { |
target = ucol_safeClone(source, NULL, NULL, &status); |
if(U_FAILURE(status)) { |
log_err("Error creating clone\n"); |
@@ -4116,7 +2818,8 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo |
errorNo++; |
return errorNo; |
} |
- if(!ucol_equals(source, target)) { |
+ /* Note: The tailoring rule string is an optional data item. */ |
+ if(!ucol_equals(source, target) && sourceRulesLen != 0) { |
log_err("Collator different from collator that was created from the same rules\n"); |
errorNo++; |
} |
@@ -4128,7 +2831,7 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo |
static void TestEquals(void) { |
/* ucol_equals is not currently a public API. There is a chance that it will become |
- * something like this, but currently it is only used by RuleBasedCollator::operator== |
+ * something like this. |
*/ |
/* test whether the two collators instantiated from the same locale are equal */ |
UErrorCode status = U_ZERO_ERROR; |
@@ -4183,8 +2886,8 @@ static void TestEquals(void) { |
if(!ucol_equals(source, source)) { |
log_err("Same collator not equal\n"); |
} |
- if(TestEqualsForCollator(locName, source, target)) { |
- log_err("Errors for root\n", locName); |
+ if(TestEqualsForCollator("root", source, target)) { |
+ log_err("Errors for root\n"); |
} |
ucol_close(source); |
@@ -4399,83 +3102,6 @@ static void TestPinyinProblem(void) { |
genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); |
} |
-#define TST_UCOL_MAX_INPUT 0x220001 |
-#define topByte 0xFF000000; |
-#define bottomByte 0xFF; |
-#define fourBytes 0xFFFFFFFF; |
- |
- |
-static void showImplicit(UChar32 i) { |
- if (i >= 0 && i <= TST_UCOL_MAX_INPUT) { |
- log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i)); |
- } |
-} |
- |
-static void TestImplicitGeneration(void) { |
- UErrorCode status = U_ZERO_ERROR; |
- UChar32 last = 0; |
- UChar32 current; |
- UChar32 i = 0, j = 0; |
- UChar32 roundtrip = 0; |
- UChar32 lastBottom = 0; |
- UChar32 currentBottom = 0; |
- UChar32 lastTop = 0; |
- UChar32 currentTop = 0; |
- |
- UCollator *coll = ucol_open("root", &status); |
- if(U_FAILURE(status)) { |
- log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); |
- return; |
- } |
- |
- uprv_uca_getRawFromImplicit(0xE20303E7); |
- |
- for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) { |
- current = uprv_uca_getImplicitFromRaw(i) & fourBytes; |
- |
- /* check that it round-trips AND that all intervening ones are illegal*/ |
- roundtrip = uprv_uca_getRawFromImplicit(current); |
- if (roundtrip != i) { |
- log_err("No roundtrip %08X\n", i); |
- } |
- if (last != 0) { |
- for (j = last + 1; j < current; ++j) { |
- roundtrip = uprv_uca_getRawFromImplicit(j); |
- /* raise an error if it *doesn't* find an error*/ |
- if (roundtrip != -1) { |
- log_err("Fails to recognize illegal %08X\n", j); |
- } |
- } |
- } |
- /* now do other consistency checks*/ |
- lastBottom = last & bottomByte; |
- currentBottom = current & bottomByte; |
- lastTop = last & topByte; |
- currentTop = current & topByte; |
- (void)lastBottom; /* Suppress set but not used warnings. */ |
- (void)currentBottom; |
- |
- /* print out some values for spot-checking*/ |
- if (lastTop != currentTop || i == 0x10000 || i == 0x110000) { |
- showImplicit(i-3); |
- showImplicit(i-2); |
- showImplicit(i-1); |
- showImplicit(i); |
- showImplicit(i+1); |
- showImplicit(i+2); |
- } |
- last = current; |
- |
- if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) { |
- log_err("No raw <-> code point roundtrip for 0x%08X\n", i); |
- } |
- } |
- showImplicit(TST_UCOL_MAX_INPUT-2); |
- showImplicit(TST_UCOL_MAX_INPUT-1); |
- showImplicit(TST_UCOL_MAX_INPUT); |
- ucol_close(coll); |
-} |
- |
/** |
* Iterate through the given iterator, checking to see that all the strings |
* in the expected array are present. |
@@ -4602,8 +3228,8 @@ ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, |
&isAvailable, &ec); |
if (assertSuccess("getFunctionalEquivalent", &ec)) { |
assertEquals("getFunctionalEquivalent(de_DE)", "root", loc); |
- assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE", |
- isAvailable == TRUE); |
+ assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE", |
+ isAvailable == FALSE); |
} |
} |
@@ -4955,9 +3581,20 @@ TestVI5913(void) |
UCollator *coll =NULL; |
uint8_t resColl[100], expColl[100]; |
int32_t rLen, tLen, ruleLen, sLen, kLen; |
- UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/ |
+ UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/ |
UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ |
- UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/ |
+ /* |
+ * Note: Just tailoring &z<ae^ does not work as expected: |
+ * The UCA spec requires for discontiguous contractions that they |
+ * extend an *existing match* by one combining mark at a time. |
+ * Therefore, ae must be a contraction so that the builder finds |
+ * discontiguous contractions for ae^, for example with an intervening underdot. |
+ * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc. |
+ */ |
+ UChar rule3[256]={ |
+ 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */ |
+ 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/ |
+ 0}; |
static const UChar tData[][20]={ |
{0x1EAC, 0}, |
{0x0041, 0x0323, 0x0302, 0}, |
@@ -5098,18 +3735,22 @@ TestVI5913(void) |
coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); |
tLen = u_strlen(tailorData3[3]); |
kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); |
+ log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen); |
+ for(i = 0; i<kLen; i++) { |
+ log_verbose(" %02X", expColl[i]); |
+ } |
for (j=4; j<6; j++) { |
tLen = u_strlen(tailorData3[j]); |
rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); |
if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { |
- log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); |
+ log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen); |
for(i = 0; i<rLen; i++) { |
log_err(" %02X", resColl[i]); |
} |
} |
- log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); |
+ log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen); |
for(i = 0; i<rLen; i++) { |
log_verbose(" %02X", resColl[i]); |
} |
@@ -5153,11 +3794,15 @@ TestTailor6179(void) |
/* |
* These values from FractionalUCA.txt will change, |
* and need to be updated here. |
+ * TODO: Make this not check for particular sort keys. |
+ * Instead, test that we get CEs before & after other ignorables; see ticket #6179. |
*/ |
- static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0}; |
- static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0}; |
- static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; |
- static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; |
+ static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0}; |
+ static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0}; |
+ static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0}; |
+ static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0}; |
+ |
+ UParseError parseError; |
/* Test [Last Primary ignorable] */ |
@@ -5191,10 +3836,12 @@ TestTailor6179(void) |
/* Test [Last Secondary ignorable] */ |
log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n"); |
- ruleLen = u_strlen(rule1); |
- coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); |
+ ruleLen = u_strlen(rule2); |
+ coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status); |
if (U_FAILURE(status)) { |
log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status)); |
+ log_info(" offset=%d \"%s\" | \"%s\"\n", |
+ parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1)); |
return; |
} |
tLen = u_strlen(tData2[0]); |
@@ -5206,16 +3853,14 @@ TestTailor6179(void) |
} |
log_err("\n"); |
} |
- if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see ticket #8982 */ |
- tLen = u_strlen(tData2[1]); |
- rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); |
- if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) { |
- log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); |
- for(i = 0; i<rLen; i++) { |
- log_err(" %02X", resColl[i]); |
- } |
- log_err("\n"); |
+ tLen = u_strlen(tData2[1]); |
+ rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); |
+ if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) { |
+ log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); |
+ for(i = 0; i<rLen; i++) { |
+ log_err(" %02X", resColl[i]); |
} |
+ log_err("\n"); |
} |
ucol_close(coll); |
} |
@@ -5582,6 +4227,10 @@ static void doTestOneTestCase(const OneTestCase testcases[], |
myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); |
if(U_FAILURE(status)){ |
log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); |
+ log_info(" offset=%d \"%s\" | \"%s\"\n", |
+ parse_error.offset, |
+ aescstrdup(parse_error.preContext, -1), |
+ aescstrdup(parse_error.postContext, -1)); |
return; |
} |
log_verbose("Testing the <<* syntax\n"); |
@@ -5627,13 +4276,13 @@ const static OneTestCase rangeTestcases[] = { |
static int nRangeTestcases = LEN(rangeTestcases); |
const static OneTestCase rangeTestcasesSupplemental[] = { |
- { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */ |
- { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */ |
+ { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */ |
+ { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */ |
{ {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */ |
- { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */ |
+ { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */ |
{ {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ |
{ {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ |
- { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */ |
+ { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */ |
}; |
static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); |
@@ -5690,10 +4339,10 @@ static void TestSameStrengthListQuoted(void) |
static void TestSameStrengthListSupplemental(void) |
{ |
const char* strRules[] = { |
- "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002", |
- "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", |
- "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002", |
- "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", |
+ "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002", |
+ "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", |
+ "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002", |
+ "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", |
}; |
doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); |
} |
@@ -5741,7 +4390,8 @@ static void TestSameStrengthListRanges(void) |
static void TestSameStrengthListSupplementalRanges(void) |
{ |
const char* strRules[] = { |
- "&\\ufffe<*\\uffff-\\U00010002", |
+ /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */ |
+ "&\\u4e00<*\\ufffb\\U00010000-\\U00010002", |
}; |
doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); |
} |
@@ -6041,6 +4691,7 @@ static void TestReorderingAPI(void) |
int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; |
int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS}; |
int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; |
+ int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE; |
UCollationResult collResult; |
int32_t retrievedReorderCodesLength; |
int32_t retrievedReorderCodes[10]; |
@@ -6118,6 +4769,22 @@ static void TestReorderingAPI(void) |
return; |
} |
+ /* clear the reordering using [NONE] */ |
+ ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status); |
+ if (U_FAILURE(status)) { |
+ log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status)); |
+ return; |
+ } |
+ |
+ /* get the reordering again */ |
+ retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); |
+ if (retrievedReorderCodesLength != 0) { |
+ log_err_status(status, |
+ "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n", |
+ retrievedReorderCodesLength); |
+ return; |
+ } |
+ |
/* test for error condition on duplicate reorder codes */ |
ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status); |
if (!U_FAILURE(status)) { |
@@ -6272,17 +4939,22 @@ static void TestReorderingAPIWithRuleCreatedCollator(void) |
ucol_close(myCollation); |
} |
-static int compareUScriptCodes(const void * a, const void * b) |
-{ |
- return ( *(int32_t*)a - *(int32_t*)b ); |
+static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) { |
+ int32_t i; |
+ for (i = 0; i < length; ++i) { |
+ if (expectedScript == scripts[i]) { return TRUE; } |
+ } |
+ return FALSE; |
} |
static void TestEquivalentReorderingScripts(void) { |
UErrorCode status = U_ZERO_ERROR; |
- int32_t equivalentScripts[50]; |
- int32_t equivalentScriptsLength; |
- int loopIndex; |
- int32_t equivalentScriptsResult[] = { |
+ int32_t equivalentScripts[100]; |
+ int32_t length; |
+ int i; |
+ int32_t prevScript; |
+ /* At least these scripts are expected to be equivalent. There may be more. */ |
+ static const int32_t expectedScripts[] = { |
USCRIPT_BOPOMOFO, |
USCRIPT_LISU, |
USCRIPT_LYCIAN, |
@@ -6311,46 +4983,49 @@ static void TestEquivalentReorderingScripts(void) { |
USCRIPT_MEROITIC_HIEROGLYPHS |
}; |
- qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes); |
- |
/* UScript.GOTHIC */ |
- equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); |
+ length = ucol_getEquivalentReorderCodes( |
+ USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); |
if (U_FAILURE(status)) { |
- log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status)); |
+ log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status)); |
return; |
} |
- /* |
- fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); |
- fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength); |
- for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { |
- fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]); |
+ if (length < LEN(expectedScripts)) { |
+ log_err("ERROR/Gothic: retrieved equivalent script length wrong: " |
+ "expected at least %d, was = %d\n", |
+ LEN(expectedScripts), length); |
} |
- */ |
- if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { |
- log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength); |
- return; |
+ prevScript = -1; |
+ for (i = 0; i < length; ++i) { |
+ int32_t script = equivalentScripts[i]; |
+ if (script <= prevScript) { |
+ log_err("ERROR/Gothic: equivalent scripts out of order at index %d\n", i); |
+ } |
+ prevScript = script; |
} |
- for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { |
- if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { |
- log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]); |
- return; |
+ for (i = 0; i < LEN(expectedScripts); i++) { |
+ if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) { |
+ log_err("ERROR/Gothic: equivalent scripts do not contain %d\n", |
+ expectedScripts[i]); |
} |
} |
/* UScript.SHAVIAN */ |
- equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status); |
+ length = ucol_getEquivalentReorderCodes( |
+ USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status); |
if (U_FAILURE(status)) { |
- log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status)); |
+ log_err_status(status, "ERROR/Shavian: retrieving equivalent reorder codes: %s\n", myErrorName(status)); |
return; |
} |
- if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { |
- log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength); |
- return; |
+ if (length < LEN(expectedScripts)) { |
+ log_err("ERROR/Shavian: retrieved equivalent script length wrong: " |
+ "expected at least %d, was = %d\n", |
+ LEN(expectedScripts), length); |
} |
- for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { |
- if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { |
- log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]); |
- return; |
+ for (i = 0; i < LEN(expectedScripts); i++) { |
+ if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) { |
+ log_err("ERROR/Shavian: equivalent scripts do not contain %d\n", |
+ expectedScripts[i]); |
} |
} |
} |
@@ -6854,6 +5529,11 @@ static void TestImport(void) |
} |
virules = (UChar*) ucol_getRules(vicoll, &viruleslength); |
+ if(viruleslength == 0) { |
+ log_data_err("missing vi tailoring rule string\n"); |
+ ucol_close(vicoll); |
+ return; |
+ } |
escoll = ucol_open("es", &status); |
esrules = (UChar*) ucol_getRules(escoll, &esruleslength); |
viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*)); |
@@ -6953,6 +5633,11 @@ static void TestImportWithType(void) |
return; |
} |
virules = ucol_getRules(vicoll, &viruleslength); |
+ if(viruleslength == 0) { |
+ log_data_err("missing vi tailoring rule string\n"); |
+ ucol_close(vicoll); |
+ return; |
+ } |
/* decoll = ucol_open("de@collation=phonebook", &status); */ |
decoll = ucol_open("de-u-co-phonebk", &status); |
if(U_FAILURE(status)){ |
@@ -7076,7 +5761,7 @@ static const LongUpperStrItem longUpperStrItems[] = { |
{ NULL, 0 } |
}; |
-enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */ |
+enum { kCollKeyLenMax = 850 }; /* may change with collation changes */ |
/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */ |
static void TestCaseLevelBufferOverflow(void) |
@@ -7114,6 +5799,38 @@ static void TestCaseLevelBufferOverflow(void) |
} |
} |
+/* Test for #10595 */ |
+static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */ |
+#define KEY_PART_SIZE 16 |
+ |
+static void TestNextSortKeyPartJaIdentical(void) |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ UCollator *coll; |
+ uint8_t keyPart[KEY_PART_SIZE]; |
+ UCharIterator iter; |
+ uint32_t state[2] = {0, 0}; |
+ int32_t keyPartLen; |
+ |
+ coll = ucol_open("ja", &status); |
+ ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); |
+ if (U_FAILURE(status)) { |
+ log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status)); |
+ return; |
+ } |
+ |
+ uiter_setString(&iter, testJapaneseName, 5); |
+ keyPartLen = KEY_PART_SIZE; |
+ while (keyPartLen == KEY_PART_SIZE) { |
+ keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status); |
+ if (U_FAILURE(status)) { |
+ log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status)); |
+ break; |
+ } |
+ } |
+ |
+ ucol_close(coll); |
+} |
#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) |
@@ -7131,6 +5848,7 @@ void addMiscCollTest(TestNode** root) |
TEST(TestExtremeCompression); |
TEST(TestSurrogates); |
TEST(TestVariableTopSetting); |
+ TEST(TestMaxVariable); |
TEST(TestBocsuCoverage); |
TEST(TestCyrillicTailoring); |
TEST(TestCase); |
@@ -7138,9 +5856,6 @@ void addMiscCollTest(TestNode** root) |
TEST(BlackBirdTest); |
TEST(FunkyATest); |
TEST(BillFairmanTest); |
- TEST(RamsRulesTest); |
- TEST(IsTailoredTest); |
- TEST(TestCollations); |
TEST(TestChMove); |
TEST(TestImplicitTailoring); |
TEST(TestFCDProblem); |
@@ -7149,8 +5864,6 @@ void addMiscCollTest(TestNode** root) |
TEST(TestJ815); |
/*TEST(TestJ831);*/ /* we changed lv locale */ |
TEST(TestBefore); |
- TEST(TestRedundantRules); |
- TEST(TestExpansionSyntax); |
TEST(TestHangulTailoring); |
TEST(TestUCARules); |
TEST(TestIncrementalNormalize); |
@@ -7172,7 +5885,6 @@ void addMiscCollTest(TestNode** root) |
TEST(TestNumericCollation); |
TEST(TestTibetanConformance); |
TEST(TestPinyinProblem); |
- TEST(TestImplicitGeneration); |
TEST(TestSeparateTrees); |
TEST(TestBeforePinyin); |
TEST(TestBeforeTightening); |
@@ -7224,6 +5936,7 @@ void addMiscCollTest(TestNode** root) |
TEST(TestReorderWithNumericCollation); |
TEST(TestCaseLevelBufferOverflow); |
+ TEST(TestNextSortKeyPartJaIdentical); |
} |
#endif /* #if !UCONFIG_NO_COLLATION */ |