Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(497)

Unified Diff: source/test/cintltst/cmsccoll.c

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/test/cintltst/cloctst.c ('k') | source/test/cintltst/cmsgtst.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/test/cintltst/cmsccoll.c
diff --git a/source/test/cintltst/cmsccoll.c b/source/test/cintltst/cmsccoll.c
index b47472806b246bc3c294b6ed7f7dbe2014a7577b..d20cd9a93aeed7ccf1123ce532201b071b8d4144 100644
--- a/source/test/cintltst/cmsccoll.c
+++ b/source/test/cintltst/cmsccoll.c
@@ -1,7 +1,7 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2001-2013, International Business Machines Corporation and
+ * Copyright (c) 2001-2014, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*******************************************************************************
@@ -29,7 +29,6 @@
#include "unicode/ustring.h"
#include "string.h"
#include "ucol_imp.h"
-#include "ucol_tok.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
@@ -413,1117 +412,6 @@ static void BillFairmanTest(void) {
}
}
-static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
- UChar source[256] = { '\0'};
- UChar target[256] = { '\0'};
- UChar preP = 0x31a3;
- UChar preQ = 0x310d;
-/*
- UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
- UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
-*/
- /*log_verbose("Testing primary\n");*/
-
- doTest(col, p, q, UCOL_LESS);
-/*
- UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
-
- if(result!=UCOL_LESS){
- aescstrdup(p,utfSource,256);
- aescstrdup(q,utfTarget,256);
- fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
- }
-*/
- source[0] = preP;
- u_strcpy(source+1,p);
- target[0] = preQ;
- u_strcpy(target+1,q);
- doTest(col, source, target, UCOL_LESS);
-/*
- fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
-*/
-}
-
-static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
- UChar source[256] = { '\0'};
- UChar target[256] = { '\0'};
-
- /*log_verbose("Testing secondary\n");*/
-
- doTest(col, p, q, UCOL_LESS);
-/*
- fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
-*/
- source[0] = 0x0053;
- u_strcpy(source+1,p);
- target[0]= 0x0073;
- u_strcpy(target+1,q);
-
- doTest(col, source, target, UCOL_LESS);
-/*
- fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
-*/
-
-
- u_strcpy(source,p);
- source[u_strlen(p)] = 0x62;
- source[u_strlen(p)+1] = 0;
-
-
- u_strcpy(target,q);
- target[u_strlen(q)] = 0x61;
- target[u_strlen(q)+1] = 0;
-
- doTest(col, source, target, UCOL_GREATER);
-
-/*
- fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
-*/
-}
-
-static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
- UChar source[256] = { '\0'};
- UChar target[256] = { '\0'};
-
- /*log_verbose("Testing tertiary\n");*/
-
- doTest(col, p, q, UCOL_LESS);
-/*
- fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
-*/
- source[0] = 0x0020;
- u_strcpy(source+1,p);
- target[0]= 0x002D;
- u_strcpy(target+1,q);
-
- doTest(col, source, target, UCOL_LESS);
-/*
- fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
-*/
-
- u_strcpy(source,p);
- source[u_strlen(p)] = 0xE0;
- source[u_strlen(p)+1] = 0;
-
- u_strcpy(target,q);
- target[u_strlen(q)] = 0x61;
- target[u_strlen(q)+1] = 0;
-
- doTest(col, source, target, UCOL_GREATER);
-
-/*
- fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
-*/
-}
-
-static void testEquality(UCollator* col, const UChar* p,const UChar* q){
-/*
- UChar source[256] = { '\0'};
- UChar target[256] = { '\0'};
-*/
-
- doTest(col, p, q, UCOL_EQUAL);
-/*
- fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
-*/
-}
-
-static void testCollator(UCollator *coll, UErrorCode *status) {
- const UChar *rules = NULL, *current = NULL;
- int32_t ruleLen = 0;
- uint32_t strength = 0;
- uint32_t chOffset = 0; uint32_t chLen = 0;
- uint32_t exOffset = 0; uint32_t exLen = 0;
- uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
- uint32_t firstEx = 0;
-/* uint32_t rExpsLen = 0; */
- uint32_t firstLen = 0;
- UBool varT = FALSE; UBool top_ = TRUE;
- uint16_t specs = 0;
- UBool startOfRules = TRUE;
- UBool lastReset = FALSE;
- UBool before = FALSE;
- uint32_t beforeStrength = 0;
- UColTokenParser src;
- UColOptionSet opts;
-
- UChar first[256];
- UChar second[256];
- UChar tempB[256];
- uint32_t tempLen;
- UChar *rulesCopy = NULL;
- UParseError parseError;
-
- uprv_memset(&src, 0, sizeof(UColTokenParser));
-
- src.opts = &opts;
-
- rules = ucol_getRules(coll, &ruleLen);
- if(U_SUCCESS(*status) && ruleLen > 0) {
- rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
- uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
- src.current = src.source = rulesCopy;
- src.end = rulesCopy+ruleLen;
- src.extraCurrent = src.end;
- src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
- *first = *second = 0;
-
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
- while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
- strength = src.parsedToken.strength;
- chOffset = src.parsedToken.charsOffset;
- chLen = src.parsedToken.charsLen;
- exOffset = src.parsedToken.extensionOffset;
- exLen = src.parsedToken.extensionLen;
- prefixOffset = src.parsedToken.prefixOffset;
- prefixLen = src.parsedToken.prefixLen;
- specs = src.parsedToken.flags;
-
- startOfRules = FALSE;
- varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
- (void)varT; /* Suppress set but not used warning. */
- top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
- if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
- second[0] = 0;
- } else {
- u_strncpy(second,src.source+chOffset, chLen);
- second[chLen] = 0;
-
- if(exLen > 0 && firstEx == 0) {
- u_strncat(first, src.source+exOffset, exLen);
- first[firstLen+exLen] = 0;
- }
-
- if(lastReset == TRUE && prefixLen != 0) {
- u_strncpy(first+prefixLen, first, firstLen);
- u_strncpy(first, src.source+prefixOffset, prefixLen);
- first[firstLen+prefixLen] = 0;
- firstLen = firstLen+prefixLen;
- }
-
- if(before == TRUE) { /* swap first and second */
- u_strcpy(tempB, first);
- u_strcpy(first, second);
- u_strcpy(second, tempB);
-
- tempLen = firstLen;
- firstLen = chLen;
- chLen = tempLen;
-
- tempLen = firstEx;
- firstEx = exLen;
- exLen = tempLen;
- if(beforeStrength < strength) {
- strength = beforeStrength;
- }
- }
- }
- lastReset = FALSE;
-
- switch(strength){
- case UCOL_IDENTICAL:
- testEquality(coll,first,second);
- break;
- case UCOL_PRIMARY:
- testPrimary(coll,first,second);
- break;
- case UCOL_SECONDARY:
- testSecondary(coll,first,second);
- break;
- case UCOL_TERTIARY:
- testTertiary(coll,first,second);
- break;
- case UCOL_TOK_RESET:
- lastReset = TRUE;
- before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
- if(before) {
- beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
- }
- break;
- default:
- break;
- }
-
- if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
- before = FALSE;
- } else {
- firstLen = chLen;
- firstEx = exLen;
- u_strcpy(first, second);
- }
- }
- uprv_free(src.source);
- uprv_free(src.reorderCodes);
- }
-}
-
-static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
- UCollator *UCA = (UCollator *)collator;
- return ucol_strcoll(UCA, source, sLen, target, tLen);
-}
-
-/*
-static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
-#if U_PLATFORM_HAS_WIN32_API
- LCID lcid = (LCID)collator;
- return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
-#else
- return 0;
-#endif
-}
-*/
-
-static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
- UChar s1, UChar s2,
- const UChar *s, const uint32_t sLen,
- const UChar *t, const uint32_t tLen) {
- UChar source[256] = {0};
- UChar target[256] = {0};
-
- source[0] = s1;
- u_strcpy(source+1, s);
- target[0] = s2;
- u_strcpy(target+1, t);
-
- return func(collator, opts, source, sLen+1, target, tLen+1);
-}
-
-static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
- UChar s1, UChar s2,
- const UChar *s, const uint32_t sLen,
- const UChar *t, const uint32_t tLen) {
- UChar source[256] = {0};
- UChar target[256] = {0};
-
- u_strcpy(source, s);
- source[sLen] = s1;
- u_strcpy(target, t);
- target[tLen] = s2;
-
- return func(collator, opts, source, sLen+1, target, tLen+1);
-}
-
-static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
- const UChar *s, const uint32_t sLen,
- const UChar *t, const uint32_t tLen,
- UCollationResult result) {
- /*UChar fPrimary = 0x6d;*/
- /*UChar sPrimary = 0x6e;*/
- UChar fSecondary = 0x310d;
- UChar sSecondary = 0x31a3;
- UChar fTertiary = 0x310f;
- UChar sTertiary = 0x31b7;
-
- UCollationResult oposite;
- if(result == UCOL_EQUAL) {
- return UCOL_IDENTICAL;
- } else if(result == UCOL_GREATER) {
- oposite = UCOL_LESS;
- } else {
- oposite = UCOL_GREATER;
- }
-
- if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
- return UCOL_PRIMARY;
- } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
- (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
- return UCOL_SECONDARY;
- } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
- (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
- return UCOL_TERTIARY;
- } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
- (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
- return UCOL_QUATERNARY;
- } else {
- return UCOL_IDENTICAL;
- }
-}
-
-static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
- uint32_t i = 0;
-
- if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
- buffer[0] = '=';
- buffer[1] = '=';
- buffer[2] = '\0';
- } else if(res == UCOL_GREATER) {
- for(i = 0; i<strength+1; i++) {
- buffer[i] = '>';
- }
- buffer[strength+1] = '\0';
- } else {
- for(i = 0; i<strength+1; i++) {
- buffer[i] = '<';
- }
- buffer[strength+1] = '\0';
- }
-
- return buffer;
-}
-
-
-
-static void logFailure (const char *platform, const char *test,
- const UChar *source, const uint32_t sLen,
- const UChar *target, const uint32_t tLen,
- UCollationResult realRes, uint32_t realStrength,
- UCollationResult expRes, uint32_t expStrength, UBool error) {
-
- uint32_t i = 0;
-
- char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
- static int32_t maxOutputLength = 0;
- int32_t outputLength;
-
- *sEsc = *tEsc = *s = *t = 0;
- if(error == TRUE) {
- log_err("Difference between expected and generated order. Run test with -v for more info\n");
- } else if(getTestOption(VERBOSITY_OPTION) == 0) {
- return;
- }
- for(i = 0; i<sLen; i++) {
- sprintf(b, "%04X", source[i]);
- strcat(sEsc, "\\u");
- strcat(sEsc, b);
- strcat(s, b);
- strcat(s, " ");
- if(source[i] < 0x80) {
- sprintf(b, "(%c)", source[i]);
- strcat(sEsc, b);
- }
- }
- for(i = 0; i<tLen; i++) {
- sprintf(b, "%04X", target[i]);
- strcat(tEsc, "\\u");
- strcat(tEsc, b);
- strcat(t, b);
- strcat(t, " ");
- if(target[i] < 0x80) {
- sprintf(b, "(%c)", target[i]);
- strcat(tEsc, b);
- }
- }
-/*
- strcpy(output, "[[ ");
- strcat(output, sEsc);
- strcat(output, getRelationSymbol(expRes, expStrength, relation));
- strcat(output, tEsc);
-
- strcat(output, " : ");
-
- strcat(output, sEsc);
- strcat(output, getRelationSymbol(realRes, realStrength, relation));
- strcat(output, tEsc);
- strcat(output, " ]] ");
-
- log_verbose("%s", output);
-*/
-
-
- strcpy(output, "DIFF: ");
-
- strcat(output, s);
- strcat(output, " : ");
- strcat(output, t);
-
- strcat(output, test);
- strcat(output, ": ");
-
- strcat(output, sEsc);
- strcat(output, getRelationSymbol(expRes, expStrength, relation));
- strcat(output, tEsc);
-
- strcat(output, " ");
-
- strcat(output, platform);
- strcat(output, ": ");
-
- strcat(output, sEsc);
- strcat(output, getRelationSymbol(realRes, realStrength, relation));
- strcat(output, tEsc);
-
- outputLength = (int32_t)strlen(output);
- if(outputLength > maxOutputLength) {
- maxOutputLength = outputLength;
- U_ASSERT(outputLength < sizeof(output));
- }
-
- log_verbose("%s\n", output);
-
-}
-
-/*
-static void printOutRules(const UChar *rules) {
- uint32_t len = u_strlen(rules);
- uint32_t i = 0;
- char toPrint;
- uint32_t line = 0;
-
- fprintf(stdout, "Rules:");
-
- for(i = 0; i<len; i++) {
- if(rules[i]<0x7f && rules[i]>=0x20) {
- toPrint = (char)rules[i];
- if(toPrint == '&') {
- line = 1;
- fprintf(stdout, "\n&");
- } else if(toPrint == ';') {
- fprintf(stdout, "<<");
- line+=2;
- } else if(toPrint == ',') {
- fprintf(stdout, "<<<");
- line+=3;
- } else {
- fprintf(stdout, "%c", toPrint);
- line++;
- }
- } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
- fprintf(stdout, "\\u%04X", rules[i]);
- line+=6;
- }
- if(line>72) {
- fprintf(stdout, "\n");
- line = 0;
- }
- }
-
- log_verbose("\n");
-
-}
-*/
-
-static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
- uint32_t diffs = 0;
- UCollationResult realResult;
- uint32_t realStrength;
-
- uint32_t sLen = u_strlen(first);
- uint32_t tLen = u_strlen(second);
-
- realResult = func(collator, opts, first, sLen, second, tLen);
- realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
-
- if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
- logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
- diffs++;
- } else if(realResult != UCOL_LESS || realStrength != strength) {
- logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
- diffs++;
- }
- return diffs;
-}
-
-
-static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
- const UChar *rules = NULL, *current = NULL;
- int32_t ruleLen = 0;
- uint32_t strength = 0;
- uint32_t chOffset = 0; uint32_t chLen = 0;
- uint32_t exOffset = 0; uint32_t exLen = 0;
- uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
-/* uint32_t rExpsLen = 0; */
- uint32_t firstLen = 0, secondLen = 0;
- UBool varT = FALSE; UBool top_ = TRUE;
- uint16_t specs = 0;
- UBool startOfRules = TRUE;
- UColTokenParser src;
- UColOptionSet opts;
-
- UChar first[256];
- UChar second[256];
- UChar *rulesCopy = NULL;
-
- uint32_t UCAdiff = 0;
- uint32_t Windiff = 1;
- UParseError parseError;
-
- (void)top_; /* Suppress set but not used warnings. */
- (void)varT;
- (void)secondLen;
- (void)prefixLen;
- (void)prefixOffset;
-
- uprv_memset(&src, 0, sizeof(UColTokenParser));
- src.opts = &opts;
-
- rules = ucol_getRules(coll, &ruleLen);
-
- /*printOutRules(rules);*/
-
- if(U_SUCCESS(*status) && ruleLen > 0) {
- rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
- uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
- src.current = src.source = rulesCopy;
- src.end = rulesCopy+ruleLen;
- src.extraCurrent = src.end;
- src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
- *first = *second = 0;
-
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
- strength = src.parsedToken.strength;
- chOffset = src.parsedToken.charsOffset;
- chLen = src.parsedToken.charsLen;
- exOffset = src.parsedToken.extensionOffset;
- exLen = src.parsedToken.extensionLen;
- prefixOffset = src.parsedToken.prefixOffset;
- prefixLen = src.parsedToken.prefixLen;
- specs = src.parsedToken.flags;
-
- startOfRules = FALSE;
- varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
- top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
-
- u_strncpy(second,src.source+chOffset, chLen);
- second[chLen] = 0;
- secondLen = chLen;
-
- if(exLen > 0) {
- u_strncat(first, src.source+exOffset, exLen);
- first[firstLen+exLen] = 0;
- firstLen += exLen;
- }
-
- if(strength != UCOL_TOK_RESET) {
- if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
- UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
- /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
- }
- }
-
-
- firstLen = chLen;
- u_strcpy(first, second);
-
- }
- if(UCAdiff != 0 && Windiff != 0) {
- log_verbose("\n");
- }
- if(UCAdiff == 0) {
- log_verbose("No immediate difference with %s!\n", refName);
- }
- if(Windiff == 0) {
- log_verbose("No immediate difference with Win32!\n");
- }
- uprv_free(src.source);
- uprv_free(src.reorderCodes);
- }
-}
-
-/*
- * Takes two CEs (lead and continuation) and
- * compares them as CEs should be compared:
- * primary vs. primary, secondary vs. secondary
- * tertiary vs. tertiary
- */
-static int32_t compareCEs(uint32_t s1, uint32_t s2,
- uint32_t t1, uint32_t t2) {
- uint32_t s = 0, t = 0;
- if(s1 == t1 && s2 == t2) {
- return 0;
- }
- s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
- t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
- if(s < t) {
- return -1;
- } else if(s > t) {
- return 1;
- } else {
- s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
- t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
- if(s < t) {
- return -1;
- } else if(s > t) {
- return 1;
- } else {
- s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
- t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
- if(s < t) {
- return -1;
- } else {
- return 1;
- }
- }
- }
-}
-
-typedef struct {
- uint32_t startCE;
- uint32_t startContCE;
- uint32_t limitCE;
- uint32_t limitContCE;
-} indirectBoundaries;
-
-/* these values are used for finding CE values for indirect positioning. */
-/* Indirect positioning is a mechanism for allowing resets on symbolic */
-/* values. It only works for resets and you cannot tailor indirect names */
-/* An indirect name can define either an anchor point or a range. An */
-/* anchor point behaves in exactly the same way as a code point in reset */
-/* would, except that it cannot be tailored. A range (we currently only */
-/* know for the [top] range will explicitly set the upper bound for */
-/* generated CEs, thus allowing for better control over how many CEs can */
-/* be squeezed between in the range without performance penalty. */
-/* In that respect, we use [top] for tailoring of locales that use CJK */
-/* characters. Other indirect values are currently a pure convenience, */
-/* they can be used to assure that the CEs will be always positioned in */
-/* the same place relative to a point with known properties (e.g. first */
-/* primary ignorable). */
-static indirectBoundaries ucolIndirectBoundaries[15];
-static UBool indirectBoundariesSet = FALSE;
-static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
- /* Set values for the top - TODO: once we have values for all the indirects, we are going */
- /* to initalize here. */
- ucolIndirectBoundaries[indexR].startCE = start[0];
- ucolIndirectBoundaries[indexR].startContCE = start[1];
- if(end) {
- ucolIndirectBoundaries[indexR].limitCE = end[0];
- ucolIndirectBoundaries[indexR].limitContCE = end[1];
- } else {
- ucolIndirectBoundaries[indexR].limitCE = 0;
- ucolIndirectBoundaries[indexR].limitContCE = 0;
- }
-}
-
-static void testCEs(UCollator *coll, UErrorCode *status) {
- const UChar *rules = NULL, *current = NULL;
- int32_t ruleLen = 0;
-
- uint32_t strength = 0;
- uint32_t maxStrength = UCOL_IDENTICAL;
- uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
- uint32_t lastCE;
- uint32_t lastContCE;
-
- int32_t result = 0;
- uint32_t chOffset = 0; uint32_t chLen = 0;
- uint32_t exOffset = 0; uint32_t exLen = 0;
- uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
- uint32_t oldOffset = 0;
-
- /* uint32_t rExpsLen = 0; */
- /* uint32_t firstLen = 0; */
- uint16_t specs = 0;
- UBool varT = FALSE; UBool top_ = TRUE;
- UBool startOfRules = TRUE;
- UBool before = FALSE;
- UColTokenParser src;
- UColOptionSet opts;
- UParseError parseError;
- UChar *rulesCopy = NULL;
- collIterate *c = uprv_new_collIterate(status);
- UCAConstants *consts = NULL;
- uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
- UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
- const char *colLoc;
- UCollator *UCA = ucol_open("root", status);
-
- (void)varT; /* Suppress set but not used warnings. */
- (void)prefixLen;
- (void)prefixOffset;
- (void)exLen;
- (void)exOffset;
-
- if (U_FAILURE(*status)) {
- log_err("Could not open root collator %s\n", u_errorName(*status));
- uprv_delete_collIterate(c);
- return;
- }
-
- colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
- if (U_FAILURE(*status)) {
- log_err("Could not get collator name: %s\n", u_errorName(*status));
- ucol_close(UCA);
- uprv_delete_collIterate(c);
- return;
- }
-
- uprv_memset(&src, 0, sizeof(UColTokenParser));
-
- consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
- UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
- /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
- UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
- UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
-
- baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
-
- src.opts = &opts;
-
- rules = ucol_getRules(coll, &ruleLen);
-
- src.invUCA = ucol_initInverseUCA(status);
-
- if(indirectBoundariesSet == FALSE) {
- /* UCOL_RESET_TOP_VALUE */
- setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
- /* UCOL_FIRST_PRIMARY_IGNORABLE */
- setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
- /* UCOL_LAST_PRIMARY_IGNORABLE */
- setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
- /* UCOL_FIRST_SECONDARY_IGNORABLE */
- setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
- /* UCOL_LAST_SECONDARY_IGNORABLE */
- setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
- /* UCOL_FIRST_TERTIARY_IGNORABLE */
- setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
- /* UCOL_LAST_TERTIARY_IGNORABLE */
- setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
- /* UCOL_FIRST_VARIABLE */
- setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
- /* UCOL_LAST_VARIABLE */
- setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
- /* UCOL_FIRST_NON_VARIABLE */
- setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
- /* UCOL_LAST_NON_VARIABLE */
- setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
- /* UCOL_FIRST_IMPLICIT */
- setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
- /* UCOL_LAST_IMPLICIT */
- setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
- /* UCOL_FIRST_TRAILING */
- setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
- /* UCOL_LAST_TRAILING */
- setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
- ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
- indirectBoundariesSet = TRUE;
- }
-
-
- if(U_SUCCESS(*status) && ruleLen > 0) {
- rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
- uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
- src.current = src.source = rulesCopy;
- src.end = rulesCopy+ruleLen;
- src.extraCurrent = src.end;
- src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
-
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
- strength = src.parsedToken.strength;
- chOffset = src.parsedToken.charsOffset;
- chLen = src.parsedToken.charsLen;
- exOffset = src.parsedToken.extensionOffset;
- exLen = src.parsedToken.extensionLen;
- prefixOffset = src.parsedToken.prefixOffset;
- prefixLen = src.parsedToken.prefixLen;
- specs = src.parsedToken.flags;
-
- startOfRules = FALSE;
- varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
- top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
-
- uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
-
- currCE = ucol_getNextCE(coll, c, status);
- if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
- log_verbose("Thai prevowel detected. Will pick next CE\n");
- currCE = ucol_getNextCE(coll, c, status);
- }
-
- currContCE = ucol_getNextCE(coll, c, status);
- if(!isContinuation(currContCE)) {
- currContCE = 0;
- }
-
- /* we need to repack CEs here */
-
- if(strength == UCOL_TOK_RESET) {
- before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
- if(top_ == TRUE) {
- int32_t tokenIndex = src.parsedToken.indirectIndex;
-
- nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
- nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
- } else {
- nextCE = baseCE = currCE;
- nextContCE = baseContCE = currContCE;
- }
- maxStrength = UCOL_IDENTICAL;
- } else {
- if(strength < maxStrength) {
- maxStrength = strength;
- if(baseCE == UCOL_RESET_TOP_VALUE) {
- log_verbose("Resetting to [top]\n");
- nextCE = UCOL_NEXT_TOP_VALUE;
- nextContCE = UCOL_NEXT_TOP_CONT;
- } else {
- result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
- }
- if(result < 0) {
- if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
- log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
- return;
- } else {
- log_err("%s: couldn't find the CE\n", colLoc);
- return;
- }
- }
- }
-
- currCE &= 0xFFFFFF3F;
- currContCE &= 0xFFFFFFBF;
-
- if(maxStrength == UCOL_IDENTICAL) {
- if(baseCE != currCE || baseContCE != currContCE) {
- log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
- }
- } else {
- if(strength == UCOL_IDENTICAL) {
- if(lastCE != currCE || lastContCE != currContCE) {
- log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
- }
- } else {
- if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
- /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
- log_err("%s: current CE is not less than base CE\n", colLoc);
- }
- if(!before) {
- if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
- /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
- log_err("%s: sequence of generated CEs is broken\n", colLoc);
- }
- } else {
- before = FALSE;
- if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
- /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
- log_err("%s: sequence of generated CEs is broken\n", colLoc);
- }
- }
- }
- }
-
- }
-
- oldOffset = chOffset;
- lastCE = currCE & 0xFFFFFF3F;
- lastContCE = currContCE & 0xFFFFFFBF;
- }
- uprv_free(src.source);
- uprv_free(src.reorderCodes);
- }
- ucol_close(UCA);
- uprv_delete_collIterate(c);
-}
-
-#if 0
-/* these locales are now picked from index RB */
-static const char* localesToTest[] = {
-"ar", "bg", "ca", "cs", "da",
-"el", "en_BE", "en_US_POSIX",
-"es", "et", "fi", "fr", "hi",
-"hr", "hu", "is", "iw", "ja",
-"ko", "lt", "lv", "mk", "mt",
-"nb", "nn", "nn_NO", "pl", "ro",
-"ru", "sh", "sk", "sl", "sq",
-"sr", "sv", "th", "tr", "uk",
-"vi", "zh", "zh_TW"
-};
-#endif
-
-static const char* rulesToTest[] = {
- /* Funky fa rule */
- "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
- /*"& Z < p, P",*/
- /* Cui Mins rules */
- "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
- "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
- "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
- "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
- "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
- "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
- "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
-};
-
-
-static void TestCollations(void) {
- int32_t noOfLoc = uloc_countAvailable();
- int32_t i = 0, j = 0;
-
- UErrorCode status = U_ZERO_ERROR;
- char cName[256];
- UChar name[256];
- int32_t nameSize;
-
-
- const char *locName = NULL;
- UCollator *coll = NULL;
- UCollator *UCA = ucol_open("", &status);
- UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
- if (U_FAILURE(status)) {
- log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
- return;
- }
- ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
-
- for(i = 0; i<noOfLoc; i++) {
- status = U_ZERO_ERROR;
- locName = uloc_getAvailable(i);
- if(uprv_strcmp("ja", locName) == 0) {
- log_verbose("Don't know how to test prefixes\n");
- continue;
- }
- if(hasCollationElements(locName)) {
- nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
- for(j = 0; j<nameSize; j++) {
- cName[j] = (char)name[j];
- }
- cName[nameSize] = 0;
- log_verbose("\nTesting locale %s (%s)\n", locName, cName);
- coll = ucol_open(locName, &status);
- if(U_SUCCESS(status)) {
- testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
- ucol_close(coll);
- } else {
- log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
- status = U_ZERO_ERROR;
- }
- }
- }
- ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
- ucol_close(UCA);
-}
-
-static void RamsRulesTest(void) {
- UErrorCode status = U_ZERO_ERROR;
- int32_t i = 0;
- UCollator *coll = NULL;
- UChar rule[2048];
- uint32_t ruleLen;
- int32_t noOfLoc = uloc_countAvailable();
- const char *locName = NULL;
-
- log_verbose("RamsRulesTest\n");
-
- if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
- /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
- return;
- }
-
- for(i = 0; i<noOfLoc; i++) {
- locName = uloc_getAvailable(i);
- if(hasCollationElements(locName)) {
- if (uprv_strcmp("ja", locName)==0) {
- log_verbose("Don't know how to test Japanese because of prefixes\n");
- continue;
- }
- if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
- log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
- continue;
- }
- if (uprv_strcmp("bn", locName)==0 ||
- uprv_strcmp("bs", locName)==0 || /* Add due to import per cldrbug 5647 */
- uprv_strcmp("bs_Cyrl", locName)==0 || /* Add due to import per cldrbug 5647 */
- uprv_strcmp("en_US_POSIX", locName)==0 ||
- uprv_strcmp("fa", locName)==0 || /* Add in #10222 with CLDR 24 integration */
- uprv_strcmp("fa_AF", locName)==0 || /* Add due to import per cldrbug 5647 */
- uprv_strcmp("gl", locName)==0 || /* Add due to import per cldrbug 5647 */
- uprv_strcmp("gl_ES", locName)==0 || /* Add due to import per cldrbug 5647 */
- uprv_strcmp("he", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
- uprv_strcmp("he_IL", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
- uprv_strcmp("km", locName)==0 ||
- uprv_strcmp("km_KH", locName)==0 ||
- uprv_strcmp("my", locName)==0 ||
- uprv_strcmp("ps", locName)==0 || /* Add in #10222 with CLDR 24 integration */
- uprv_strcmp("si", locName)==0 ||
- uprv_strcmp("si_LK", locName)==0 ||
- uprv_strcmp("sr_Latn", locName)==0 || /* Add due to import per cldrbug 5647 */
- uprv_strcmp("th", locName)==0 ||
- uprv_strcmp("th_TH", locName)==0 ||
- uprv_strcmp("zh", locName)==0 ||
- uprv_strcmp("zh_Hant", locName)==0
- ) {
- if(log_knownIssue("6040", NULL)) {
- log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
- continue;
- }
- }
- log_verbose("Testing locale %s\n", locName);
- status = U_ZERO_ERROR;
- coll = ucol_open(locName, &status);
- if(U_SUCCESS(status)) {
- if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
- if(coll->image->jamoSpecial == TRUE) {
- log_err("%s has special JAMOs\n", locName);
- }
- ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
- testCollator(coll, &status);
- testCEs(coll, &status);
- } else {
- log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
- }
- ucol_close(coll);
- } else {
- log_err("Could not open %s: %s\n", locName, u_errorName(status));
- }
- }
- }
-
- for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
- log_verbose("Testing rule: %s\n", rulesToTest[i]);
- ruleLen = u_unescape(rulesToTest[i], rule, 2048);
- status = U_ZERO_ERROR;
- coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
- if(U_SUCCESS(status)) {
- testCollator(coll, &status);
- testCEs(coll, &status);
- ucol_close(coll);
- } else {
- log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
- }
- }
-
-}
-
-static void IsTailoredTest(void) {
- UErrorCode status = U_ZERO_ERROR;
- uint32_t i = 0;
- UCollator *coll = NULL;
- UChar rule[2048];
- UChar tailored[2048];
- UChar notTailored[2048];
- uint32_t ruleLen, tailoredLen, notTailoredLen;
-
- log_verbose("IsTailoredTest\n");
-
- u_uastrcpy(rule, "&Z < A, B, C;c < d");
- ruleLen = u_strlen(rule);
-
- u_uastrcpy(tailored, "ABCcd");
- tailoredLen = u_strlen(tailored);
-
- u_uastrcpy(notTailored, "ZabD");
- notTailoredLen = u_strlen(notTailored);
-
- coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
- if(U_SUCCESS(status)) {
- for(i = 0; i<tailoredLen; i++) {
- if(!ucol_isTailored(coll, tailored[i], &status)) {
- log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
- }
- }
- for(i = 0; i<notTailoredLen; i++) {
- if(ucol_isTailored(coll, notTailored[i], &status)) {
- log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
- }
- }
- ucol_close(coll);
- }
- else {
- log_err_status(status, "Can't tailor rules\n");
- }
- /* Code coverage */
- status = U_ZERO_ERROR;
- coll = ucol_open("ja", &status);
- if(!ucol_isTailored(coll, 0x4E9C, &status)) {
- log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
- }
- ucol_close(coll);
-}
-
-
const static char chTest[][20] = {
"c",
"C",
@@ -1573,6 +461,7 @@ static void TestChMove(void) {
+/*
const static char impTest[][20] = {
"\\u4e00",
"a",
@@ -1581,6 +470,7 @@ const static char impTest[][20] = {
"B",
"\\u4e01"
};
+*/
static void TestImplicitTailoring(void) {
@@ -1589,7 +479,12 @@ static void TestImplicitTailoring(void) {
const char *data[10];
const uint32_t len;
} tests[] = {
- { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
+ {
+ /* Tailor b and c before U+4E00. */
+ "&[before 1]\\u4e00 < b < c "
+ /* Now, before U+4E00 is c; put d and e after that. */
+ "&[before 1]\\u4e00 < d < e",
+ { "b", "c", "d", "e", "\\u4e00"}, 5 },
{ "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
{ "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
{ "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
@@ -1934,205 +829,6 @@ static void TestJ815(void) {
}
-/*
-"& a < b < c < d& r < c", "& a < b < d& r < c",
-"& a < b < c < d& c < m", "& a < b < c < m < d",
-"& a < b < c < d& a < m", "& a < m < b < c < d",
-"& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
-"& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
-"& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
-"& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
-"& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
-"& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
-*/
-static void TestRedundantRules(void) {
- int32_t i;
-
- static const struct {
- const char *rules;
- const char *expectedRules;
- const char *testdata[8];
- uint32_t testdatalen;
- } tests[] = {
- /* this test conflicts with positioning of CODAN placeholder */
- /*{
- "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
- "&\\u2089<<<x",
- {"\\u2089", "x"}, 2
- }, */
- /* this test conflicts with the [before x] syntax tightening */
- /*{
- "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
- "&\\u0252<<<x",
- {"\\u0252", "x"}, 2
- }, */
- /* this test conflicts with the [before x] syntax tightening */
- /*{
- "& a < b <<< c << d <<< e& [before 1] e <<< x",
- "& a <<< x < b <<< c << d <<< e",
- {"a", "x", "b", "c", "d", "e"}, 6
- }, */
- {
- "& a < b < c < d& [before 1] c < m",
- "& a < b < m < c < d",
- {"a", "b", "m", "c", "d"}, 5
- },
- {
- "& a < b <<< c << d <<< e& [before 3] e <<< x",
- "& a < b <<< c << d <<< x <<< e",
- {"a", "b", "c", "d", "x", "e"}, 6
- },
- /* this test conflicts with the [before x] syntax tightening */
- /* {
- "& a < b <<< c << d <<< e& [before 2] e <<< x",
- "& a < b <<< c <<< x << d <<< e",
- {"a", "b", "c", "x", "d", "e"},, 6
- }, */
- {
- "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
- "& a < b <<< c << d <<< e <<< f < x < g",
- {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
- },
- {
- "& a <<< b << c < d& a < m",
- "& a <<< b << c < m < d",
- {"a", "b", "c", "m", "d"}, 5
- },
- {
- "&a<b<<b\\u0301 &z<b",
- "&a<b\\u0301 &z<b",
- {"a", "b\\u0301", "z", "b"}, 4
- },
- {
- "&z<m<<<q<<<m",
- "&z<q<<<m",
- {"z", "q", "m"},3
- },
- {
- "&z<<<m<q<<<m",
- "&z<q<<<m",
- {"z", "q", "m"}, 3
- },
- {
- "& a < b < c < d& r < c",
- "& a < b < d& r < c",
- {"a", "b", "d"}, 3
- },
- {
- "& a < b < c < d& r < c",
- "& a < b < d& r < c",
- {"r", "c"}, 2
- },
- {
- "& a < b < c < d& c < m",
- "& a < b < c < m < d",
- {"a", "b", "c", "m", "d"}, 5
- },
- {
- "& a < b < c < d& a < m",
- "& a < m < b < c < d",
- {"a", "m", "b", "c", "d"}, 5
- }
- };
-
-
- UCollator *credundant = NULL;
- UCollator *cresulting = NULL;
- UErrorCode status = U_ZERO_ERROR;
- UChar rlz[2048] = { 0 };
- uint32_t rlen = 0;
-
- for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
- log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
- rlen = u_unescape(tests[i].rules, rlz, 2048);
-
- credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
- if(status == U_FILE_ACCESS_ERROR) {
- log_data_err("Is your data around?\n");
- return;
- } else if(U_FAILURE(status)) {
- log_err("Error opening collator\n");
- return;
- }
-
- rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
- cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
-
- testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
-
- ucol_close(credundant);
- ucol_close(cresulting);
-
- log_verbose("testing using data\n");
-
- genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
- }
-
-}
-
-static void TestExpansionSyntax(void) {
- int32_t i;
-
- const static char *rules[] = {
- "&AE <<< a << b <<< c &d <<< f",
- "&AE <<< a <<< b << c << d < e < f <<< g",
- "&AE <<< B <<< C / D <<< F"
- };
-
- const static char *expectedRules[] = {
- "&A <<< a / E << b / E <<< c /E &d <<< f",
- "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
- "&A <<< B / E <<< C / ED <<< F / E"
- };
-
- const static char *testdata[][8] = {
- {"AE", "a", "b", "c"},
- {"AE", "a", "b", "c", "d", "e", "f", "g"},
- {"AE", "B", "C"} /* / ED <<< F / E"},*/
- };
-
- const static uint32_t testdatalen[] = {
- 4,
- 8,
- 3
- };
-
-
-
- UCollator *credundant = NULL;
- UCollator *cresulting = NULL;
- UErrorCode status = U_ZERO_ERROR;
- UChar rlz[2048] = { 0 };
- uint32_t rlen = 0;
-
- for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
- log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
- rlen = u_unescape(rules[i], rlz, 2048);
-
- credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
- if(status == U_FILE_ACCESS_ERROR) {
- log_data_err("Is your data around?\n");
- return;
- } else if(U_FAILURE(status)) {
- log_err("Error opening collator\n");
- return;
- }
- rlen = u_unescape(expectedRules[i], rlz, 2048);
- cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
-
- /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
- /* as a hard error test, but only in information mode */
- testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
-
- ucol_close(credundant);
- ucol_close(cresulting);
-
- log_verbose("testing using data\n");
-
- genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
- }
-}
-
static void TestCase(void)
{
const static UChar gRules[MAX_TOKEN_LEN] =
@@ -2246,13 +942,13 @@ static void TestCase(void)
};
log_verbose("mixed case test\n");
log_verbose("lower first, case level off\n");
- genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
+ genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
log_verbose("upper first, case level off\n");
- genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
+ genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
log_verbose("lower first, case level on\n");
- genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
+ genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
log_verbose("upper first, case level on\n");
- genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
+ genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
}
}
@@ -2560,25 +1256,32 @@ static void TestHangulTailoring(void) {
log_err("Unable to open collator with rules %s\n", rules);
}
- log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
- ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
- genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
-
ucol_close(coll);
log_verbose("Using ko__LOTUS locale\n");
genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
}
+/*
+ * The secondary/tertiary compression middle byte
+ * as used by the current implementation.
+ * Subject to change as the sort key compression changes.
+ * See class CollationKeys.
+ */
+enum {
+ SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */
+ TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
+};
+
static void TestCompressOverlap(void) {
UChar secstr[150];
UChar tertstr[150];
UErrorCode status = U_ZERO_ERROR;
UCollator *coll;
- char result[200];
+ uint8_t result[500];
uint32_t resultlen;
int count = 0;
- char *tempptr;
+ uint8_t *tempptr;
coll = ucol_open("", &status);
@@ -2598,29 +1301,29 @@ static void TestCompressOverlap(void) {
/* no compression secstr should have 150 secondary bytes, tertstr should
have 150 tertiary bytes.
- with correct overlapping compression, secstr should have 4 secondary
- bytes, tertstr should have > 2 tertiary bytes */
- resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
+ with correct compression, secstr should have 6 secondary
+ bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
+ resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
(void)resultlen; /* Suppress set but not used warning. */
- tempptr = uprv_strchr(result, 1) + 1;
+ tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
while (*(tempptr + 1) != 1) {
/* the last secondary collation element is not checked since it is not
part of the compression */
- if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
- log_err("Secondary compression overlapped\n");
+ if (*tempptr < SEC_COMMON_MIDDLE) {
+ log_err("Secondary top down compression overlapped\n");
}
tempptr ++;
}
/* tertiary top/bottom/common for en_US is similar to the secondary
top/bottom/common */
- resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
- tempptr = uprv_strrchr(result, 1) + 1;
+ resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
+ tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
while (*(tempptr + 1) != 0) {
/* the last secondary collation element is not checked since it is not
part of the compression */
- if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
- log_err("Tertiary compression overlapped\n");
+ if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
+ log_err("Tertiary top down compression overlapped\n");
}
tempptr ++;
}
@@ -2628,26 +1331,26 @@ static void TestCompressOverlap(void) {
/* bottom up compression ------------------------------------- */
secstr[count] = 0;
tertstr[count] = 0;
- resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
- tempptr = uprv_strchr(result, 1) + 1;
+ resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
+ tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
while (*(tempptr + 1) != 1) {
/* the last secondary collation element is not checked since it is not
part of the compression */
- if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
- log_err("Secondary compression overlapped\n");
+ if (*tempptr > SEC_COMMON_MIDDLE) {
+ log_err("Secondary bottom up compression overlapped\n");
}
tempptr ++;
}
/* tertiary top/bottom/common for en_US is similar to the secondary
top/bottom/common */
- resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
- tempptr = uprv_strrchr(result, 1) + 1;
+ resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
+ tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
while (*(tempptr + 1) != 0) {
/* the last secondary collation element is not checked since it is not
part of the compression */
- if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
- log_err("Tertiary compression overlapped\n");
+ if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
+ log_err("Tertiary bottom up compression overlapped\n");
}
tempptr ++;
}
@@ -2707,6 +1410,13 @@ static void TestContraction(void) {
{0x0063 /* 'c' */, 0x0068 /* 'h' */},
{0x0063 /* 'c' */, 0x006C /* 'l' */}
};
+#if 0
+ /*
+ * These pairs of rule strings are not guaranteed to yield the very same mappings.
+ * In fact, LDML 24 recommends an improved way of creating mappings
+ * which always yields different mappings for such pairs. See
+ * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
+ */
const static char *testrules3[] = {
"&z < xyz &xyzw << B",
"&z < xyz &xyz << B / w",
@@ -2717,6 +1427,7 @@ static void TestContraction(void) {
"&a\\ud800\\udc00m << B",
"&a << B / \\ud800\\udc00m",
};
+#endif
UErrorCode status = U_ZERO_ERROR;
UCollator *coll;
@@ -2782,8 +1493,9 @@ static void TestContraction(void) {
return;
}
ucol_close(coll);
-
+#if 0 /* see above */
for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
+ log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
UCollator *coll1,
*coll2;
UCollationElements *iter1,
@@ -2810,8 +1522,11 @@ static void TestContraction(void) {
return;
}
while (ce != UCOL_NULLORDER) {
- if (ce != (uint32_t)ucol_next(iter2, &status)) {
- log_err("CEs does not match\n");
+ uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
+ if (ce == ce2) {
+ log_verbose("CEs match: %08x\n", ce);
+ } else {
+ log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
return;
}
ce = ucol_next(iter1, &status);
@@ -2829,11 +1544,23 @@ static void TestContraction(void) {
ucol_close(coll1);
ucol_close(coll2);
}
+#endif
}
static void TestExpansion(void) {
const static char *testrules[] = {
+#if 0
+ /*
+ * This seems to have tested that M was not mapped to an expansion.
+ * I believe the old builder just did that because it computed the extension CEs
+ * at the very end, which was a bug.
+ * Among other problems, it violated the core tailoring principle
+ * by making an earlier rule depend on a later one.
+ * And, of course, if M did not get an expansion, then it was primary different from K,
+ * unlike what the rule &K<<M says.
+ */
"&J << K / B & K << M",
+#endif
"&J << K / B << M"
};
const static UChar testdata[][3] = {
@@ -2983,207 +1710,81 @@ static void TestBocsuCoverage(void) {
static void TestVariableTopSetting(void) {
UErrorCode status = U_ZERO_ERROR;
- const UChar *current = NULL;
uint32_t varTopOriginal = 0, varTop1, varTop2;
UCollator *coll = ucol_open("", &status);
if(U_SUCCESS(status)) {
- uint32_t strength = 0;
- uint16_t specs = 0;
- uint32_t chOffset = 0;
- uint32_t chLen = 0;
- uint32_t exOffset = 0;
- uint32_t exLen = 0;
- uint32_t oldChOffset = 0;
- uint32_t oldChLen = 0;
- uint32_t oldExOffset = 0;
- uint32_t oldExLen = 0;
- uint32_t prefixOffset = 0;
- uint32_t prefixLen = 0;
-
- UBool startOfRules = TRUE;
- UColTokenParser src;
- UColOptionSet opts;
-
- UChar *rulesCopy = NULL;
- uint32_t rulesLen;
-
- UCollationResult result;
+ static const UChar nul = 0;
+ static const UChar space = 0x20;
+ static const UChar dot = 0x2e; /* punctuation */
+ static const UChar degree = 0xb0; /* symbol */
+ static const UChar dollar = 0x24; /* currency symbol */
+ static const UChar zero = 0x30; /* digit */
- UChar first[256] = { 0 };
- UChar second[256] = { 0 };
- UParseError parseError;
- int32_t myQ = getTestOption(QUICK_OPTION);
-
- (void)prefixLen; /* Suppress set but not used warnings. */
- (void)prefixOffset;
- (void)specs;
-
- uprv_memset(&src, 0, sizeof(UColTokenParser));
-
- src.opts = &opts;
+ varTopOriginal = ucol_getVariableTop(coll, &status);
+ log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
+ ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
- if(getTestOption(QUICK_OPTION) <= 0) {
- setTestOption(QUICK_OPTION, 1);
+ varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
+ varTop2 = ucol_getVariableTop(coll, &status);
+ log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
+ if(U_FAILURE(status) || varTop1 != varTop2 ||
+ !ucol_equal(coll, &nul, 0, &space, 1) ||
+ ucol_equal(coll, &nul, 0, &dot, 1) ||
+ ucol_equal(coll, &nul, 0, &degree, 1) ||
+ ucol_equal(coll, &nul, 0, &dollar, 1) ||
+ ucol_equal(coll, &nul, 0, &zero, 1) ||
+ ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
+ log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
}
- /* this test will fail when normalization is turned on */
- /* therefore we always turn off exhaustive mode for it */
- { /* QUICK > 0*/
- log_verbose("Slide variable top over UCARules\n");
- rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
- rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
- rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
-
- if(U_SUCCESS(status) && rulesLen > 0) {
- ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
- src.current = src.source = rulesCopy;
- src.end = rulesCopy+rulesLen;
- src.extraCurrent = src.end;
- src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
-
- /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
- the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
- strength = src.parsedToken.strength;
- chOffset = src.parsedToken.charsOffset;
- chLen = src.parsedToken.charsLen;
- exOffset = src.parsedToken.extensionOffset;
- exLen = src.parsedToken.extensionLen;
- prefixOffset = src.parsedToken.prefixOffset;
- prefixLen = src.parsedToken.prefixLen;
- specs = src.parsedToken.flags;
-
- startOfRules = FALSE;
- {
- log_verbose("%04X %d ", *(src.source+chOffset), chLen);
- }
- if(strength == UCOL_PRIMARY) {
- status = U_ZERO_ERROR;
- varTopOriginal = ucol_getVariableTop(coll, &status);
- varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
- if(U_FAILURE(status)) {
- char buffer[256];
- char *buf = buffer;
- uint32_t i = 0, j;
- uint32_t CE = UCOL_NO_MORE_CES;
-
- /* before we start screaming, let's see if there is a problem with the rules */
- UErrorCode collIterateStatus = U_ZERO_ERROR;
- collIterate *s = uprv_new_collIterate(&collIterateStatus);
- uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
-
- CE = ucol_getNextCE(coll, s, &status);
- (void)CE; /* Suppress set but not used warning. */
-
- for(i = 0; i < oldChLen; i++) {
- j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
- buf += j;
- }
- if(status == U_PRIMARY_TOO_LONG_ERROR) {
- log_verbose("= Expected failure for %s =", buffer);
- } else {
- if(uprv_collIterateAtEnd(s)) {
- log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
- oldChOffset, u_errorName(status), buffer);
- } else {
- log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
- buffer);
- }
- }
- uprv_delete_collIterate(s);
- }
- varTop2 = ucol_getVariableTop(coll, &status);
- if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
- log_err("cannot retrieve set varTop value!\n");
- continue;
- }
+ varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
+ varTop2 = ucol_getVariableTop(coll, &status);
+ log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
+ if(U_FAILURE(status) || varTop1 != varTop2 ||
+ !ucol_equal(coll, &nul, 0, &space, 1) ||
+ !ucol_equal(coll, &nul, 0, &dot, 1) ||
+ ucol_equal(coll, &nul, 0, &degree, 1) ||
+ ucol_equal(coll, &nul, 0, &dollar, 1) ||
+ ucol_equal(coll, &nul, 0, &zero, 1) ||
+ ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
+ log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
+ }
- if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
-
- u_strncpy(first, src.source+oldChOffset, oldChLen);
- u_strncpy(first+oldChLen, src.source+chOffset, chLen);
- u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
- first[2*oldChLen+chLen] = 0;
-
- if(oldExLen == 0) {
- u_strncpy(second, src.source+chOffset, chLen);
- second[chLen] = 0;
- } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
- u_strncpy(second, src.source+oldExOffset, oldExLen);
- u_strncpy(second+oldChLen, src.source+chOffset, chLen);
- u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
- second[2*oldExLen+chLen] = 0;
- }
- result = ucol_strcoll(coll, first, -1, second, -1);
- if(result == UCOL_EQUAL) {
- doTest(coll, first, second, UCOL_EQUAL);
- } else {
- log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
- }
- }
- }
- if(strength != UCOL_TOK_RESET) {
- oldChOffset = chOffset;
- oldChLen = chLen;
- oldExOffset = exOffset;
- oldExLen = exLen;
- }
- }
- status = U_ZERO_ERROR;
- }
- else {
- log_err("Unexpected failure getting rules %s\n", u_errorName(status));
- return;
- }
- if (U_FAILURE(status)) {
- log_err("Error parsing rules %s\n", u_errorName(status));
- return;
- }
- status = U_ZERO_ERROR;
+ varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
+ varTop2 = ucol_getVariableTop(coll, &status);
+ log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
+ if(U_FAILURE(status) || varTop1 != varTop2 ||
+ !ucol_equal(coll, &nul, 0, &space, 1) ||
+ !ucol_equal(coll, &nul, 0, &dot, 1) ||
+ !ucol_equal(coll, &nul, 0, &degree, 1) ||
+ ucol_equal(coll, &nul, 0, &dollar, 1) ||
+ ucol_equal(coll, &nul, 0, &zero, 1) ||
+ ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
+ log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
}
- setTestOption(QUICK_OPTION, myQ);
+ varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
+ varTop2 = ucol_getVariableTop(coll, &status);
+ log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
+ if(U_FAILURE(status) || varTop1 != varTop2 ||
+ !ucol_equal(coll, &nul, 0, &space, 1) ||
+ !ucol_equal(coll, &nul, 0, &dot, 1) ||
+ !ucol_equal(coll, &nul, 0, &degree, 1) ||
+ !ucol_equal(coll, &nul, 0, &dollar, 1) ||
+ ucol_equal(coll, &nul, 0, &zero, 1) ||
+ ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
+ log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
+ }
log_verbose("Testing setting variable top to contractions\n");
{
- UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
- int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
- while(*conts != 0) {
- /*
- * A continuation is NUL-terminated and NUL-padded
- * except if it has the maximum length.
- */
- int32_t contractionLength = maxUCAContractionLength;
- while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
- --contractionLength;
- }
- if(*(conts+1)==0) { /* pre-context */
- varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
- } else {
- varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
- }
- if(U_FAILURE(status)) {
- if(status == U_PRIMARY_TOO_LONG_ERROR) {
- /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
- * therefore it is not an error when it complains about them. */
- log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
- *conts, *(conts+1), *(conts+2));
- } else {
- log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
- *conts, *(conts+1), *(conts+2), u_errorName(status));
- }
- status = U_ZERO_ERROR;
- }
- conts+=maxUCAContractionLength;
- }
-
- status = U_ZERO_ERROR;
-
+ UChar first[4] = { 0 };
first[0] = 0x0040;
first[1] = 0x0050;
first[2] = 0x0000;
+ status = U_ZERO_ERROR;
ucol_setVariableTop(coll, first, -1, &status);
if(U_SUCCESS(status)) {
@@ -3203,21 +1804,110 @@ static void TestVariableTopSetting(void) {
log_verbose("Testing calling with error set\n");
status = U_INTERNAL_PROGRAM_ERROR;
- varTop1 = ucol_setVariableTop(coll, first, 1, &status);
+ varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
varTop2 = ucol_getVariableTop(coll, &status);
ucol_restoreVariableTop(coll, varTop2, &status);
- varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
+ varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
varTop2 = ucol_getVariableTop(NULL, &status);
ucol_restoreVariableTop(NULL, varTop2, &status);
if(status != U_INTERNAL_PROGRAM_ERROR) {
log_err("Bad reaction to passed error!\n");
}
- uprv_free(src.source);
ucol_close(coll);
} else {
log_data_err("Couldn't open UCA collator\n");
}
+}
+
+static void TestMaxVariable() {
+ UErrorCode status = U_ZERO_ERROR;
+ UColReorderCode oldMax, max;
+ UCollator *coll;
+
+ static const UChar nul = 0;
+ static const UChar space = 0x20;
+ static const UChar dot = 0x2e; /* punctuation */
+ static const UChar degree = 0xb0; /* symbol */
+ static const UChar dollar = 0x24; /* currency symbol */
+ static const UChar zero = 0x30; /* digit */
+
+ coll = ucol_open("", &status);
+ if(U_FAILURE(status)) {
+ log_data_err("Couldn't open root collator\n");
+ return;
+ }
+
+ oldMax = ucol_getMaxVariable(coll);
+ log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
+ ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
+
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
+ max = ucol_getMaxVariable(coll);
+ log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
+ if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
+ !ucol_equal(coll, &nul, 0, &space, 1) ||
+ ucol_equal(coll, &nul, 0, &dot, 1) ||
+ ucol_equal(coll, &nul, 0, &degree, 1) ||
+ ucol_equal(coll, &nul, 0, &dollar, 1) ||
+ ucol_equal(coll, &nul, 0, &zero, 1) ||
+ ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
+ log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
+ }
+
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
+ max = ucol_getMaxVariable(coll);
+ log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
+ if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
+ !ucol_equal(coll, &nul, 0, &space, 1) ||
+ !ucol_equal(coll, &nul, 0, &dot, 1) ||
+ ucol_equal(coll, &nul, 0, &degree, 1) ||
+ ucol_equal(coll, &nul, 0, &dollar, 1) ||
+ ucol_equal(coll, &nul, 0, &zero, 1) ||
+ ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
+ log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
+ }
+
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
+ max = ucol_getMaxVariable(coll);
+ log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
+ if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
+ !ucol_equal(coll, &nul, 0, &space, 1) ||
+ !ucol_equal(coll, &nul, 0, &dot, 1) ||
+ !ucol_equal(coll, &nul, 0, &degree, 1) ||
+ ucol_equal(coll, &nul, 0, &dollar, 1) ||
+ ucol_equal(coll, &nul, 0, &zero, 1) ||
+ ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
+ log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
+ }
+
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
+ max = ucol_getMaxVariable(coll);
+ log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
+ if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
+ !ucol_equal(coll, &nul, 0, &space, 1) ||
+ !ucol_equal(coll, &nul, 0, &dot, 1) ||
+ !ucol_equal(coll, &nul, 0, &degree, 1) ||
+ !ucol_equal(coll, &nul, 0, &dollar, 1) ||
+ ucol_equal(coll, &nul, 0, &zero, 1) ||
+ ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
+ log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
+ }
+
+ log_verbose("Test restoring maxVariable\n");
+ status = U_ZERO_ERROR;
+ ucol_setMaxVariable(coll, oldMax, &status);
+ if(oldMax != ucol_getMaxVariable(coll)) {
+ log_err("Couldn't restore old maxVariable\n");
+ }
+ log_verbose("Testing calling with error set\n");
+ status = U_INTERNAL_PROGRAM_ERROR;
+ ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
+ max = ucol_getMaxVariable(coll);
+ if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
+ log_err("Bad reaction to passed error!\n");
+ }
+ ucol_close(coll);
}
static void TestNonChars(void) {
@@ -3702,6 +2392,8 @@ static void TestRuleOptions(void) {
const char *data[10];
const uint32_t len;
} tests[] = {
+#if 0
+ /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
/* - all befores here amount to zero */
{ "&[before 3][first tertiary ignorable]<<<a",
{ "\\u0000", "a"}, 2
@@ -3710,25 +2402,35 @@ static void TestRuleOptions(void) {
{ "&[before 3][last tertiary ignorable]<<<a",
{ "\\u0000", "a"}, 2
}, /* you cannot go before last tertiary ignorable */
-
+#endif
+ /*
+ * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
+ * and it *is* possible to "go before" that.
+ */
{ "&[before 3][first secondary ignorable]<<<a",
{ "\\u0000", "a"}, 2
- }, /* you cannot go before first secondary ignorable */
+ },
{ "&[before 3][last secondary ignorable]<<<a",
{ "\\u0000", "a"}, 2
- }, /* you cannot go before first secondary ignorable */
+ },
/* 'normal' befores */
- { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
+ /*
+ * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
+ * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
+ * because there is no tailoring space before that boundary.
+ * Made the tests work by tailoring to a space instead.
+ */
+ { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
{ "c", "b", "\\u0332", "a" }, 4
},
/* we don't have a code point that corresponds to
* the last primary ignorable
*/
- { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
+ { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
{ "\\u0332", "\\u20e3", "c", "b", "a" }, 5
},
@@ -3754,14 +2456,14 @@ static void TestRuleOptions(void) {
"&[first implicit]<a",
{ "b", "\\u4e00", "a", "\\u4e01"}, 4
},
-
+#if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
{ "&[before 1][last implicit]<b"
"&[last implicit]<a",
{ "b", "\\U0010FFFD", "a" }, 3
},
-
+#endif
{ "&[last variable]<z"
- "&[last primary ignorable]<x"
+ "&' '<x" /* was &[last primary ignorable]<x, see above */
"&[last secondary ignorable]<<y"
"&[last tertiary ignorable]<<<w"
"&[top]<u",
@@ -4007,7 +2709,7 @@ static void TestPartialSortKeyTermination(void) {
"\\udc00\\ud800\\ud800"
};
- int32_t i = sizeof(UCollator);
+ int32_t i;
UErrorCode status = U_ZERO_ERROR;
@@ -4081,7 +2783,7 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo
errorNo++;
}
ucol_close(target);
- if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
+ if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
target = ucol_safeClone(source, NULL, NULL, &status);
if(U_FAILURE(status)) {
log_err("Error creating clone\n");
@@ -4116,7 +2818,8 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo
errorNo++;
return errorNo;
}
- if(!ucol_equals(source, target)) {
+ /* Note: The tailoring rule string is an optional data item. */
+ if(!ucol_equals(source, target) && sourceRulesLen != 0) {
log_err("Collator different from collator that was created from the same rules\n");
errorNo++;
}
@@ -4128,7 +2831,7 @@ static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo
static void TestEquals(void) {
/* ucol_equals is not currently a public API. There is a chance that it will become
- * something like this, but currently it is only used by RuleBasedCollator::operator==
+ * something like this.
*/
/* test whether the two collators instantiated from the same locale are equal */
UErrorCode status = U_ZERO_ERROR;
@@ -4183,8 +2886,8 @@ static void TestEquals(void) {
if(!ucol_equals(source, source)) {
log_err("Same collator not equal\n");
}
- if(TestEqualsForCollator(locName, source, target)) {
- log_err("Errors for root\n", locName);
+ if(TestEqualsForCollator("root", source, target)) {
+ log_err("Errors for root\n");
}
ucol_close(source);
@@ -4399,83 +3102,6 @@ static void TestPinyinProblem(void) {
genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
}
-#define TST_UCOL_MAX_INPUT 0x220001
-#define topByte 0xFF000000;
-#define bottomByte 0xFF;
-#define fourBytes 0xFFFFFFFF;
-
-
-static void showImplicit(UChar32 i) {
- if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
- log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
- }
-}
-
-static void TestImplicitGeneration(void) {
- UErrorCode status = U_ZERO_ERROR;
- UChar32 last = 0;
- UChar32 current;
- UChar32 i = 0, j = 0;
- UChar32 roundtrip = 0;
- UChar32 lastBottom = 0;
- UChar32 currentBottom = 0;
- UChar32 lastTop = 0;
- UChar32 currentTop = 0;
-
- UCollator *coll = ucol_open("root", &status);
- if(U_FAILURE(status)) {
- log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
- return;
- }
-
- uprv_uca_getRawFromImplicit(0xE20303E7);
-
- for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
- current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
-
- /* check that it round-trips AND that all intervening ones are illegal*/
- roundtrip = uprv_uca_getRawFromImplicit(current);
- if (roundtrip != i) {
- log_err("No roundtrip %08X\n", i);
- }
- if (last != 0) {
- for (j = last + 1; j < current; ++j) {
- roundtrip = uprv_uca_getRawFromImplicit(j);
- /* raise an error if it *doesn't* find an error*/
- if (roundtrip != -1) {
- log_err("Fails to recognize illegal %08X\n", j);
- }
- }
- }
- /* now do other consistency checks*/
- lastBottom = last & bottomByte;
- currentBottom = current & bottomByte;
- lastTop = last & topByte;
- currentTop = current & topByte;
- (void)lastBottom; /* Suppress set but not used warnings. */
- (void)currentBottom;
-
- /* print out some values for spot-checking*/
- if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
- showImplicit(i-3);
- showImplicit(i-2);
- showImplicit(i-1);
- showImplicit(i);
- showImplicit(i+1);
- showImplicit(i+2);
- }
- last = current;
-
- if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
- log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
- }
- }
- showImplicit(TST_UCOL_MAX_INPUT-2);
- showImplicit(TST_UCOL_MAX_INPUT-1);
- showImplicit(TST_UCOL_MAX_INPUT);
- ucol_close(coll);
-}
-
/**
* Iterate through the given iterator, checking to see that all the strings
* in the expected array are present.
@@ -4602,8 +3228,8 @@ ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
&isAvailable, &ec);
if (assertSuccess("getFunctionalEquivalent", &ec)) {
assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
- assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
- isAvailable == TRUE);
+ assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
+ isAvailable == FALSE);
}
}
@@ -4955,9 +3581,20 @@ TestVI5913(void)
UCollator *coll =NULL;
uint8_t resColl[100], expColl[100];
int32_t rLen, tLen, ruleLen, sLen, kLen;
- UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
+ UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
- UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
+ /*
+ * Note: Just tailoring &z<ae^ does not work as expected:
+ * The UCA spec requires for discontiguous contractions that they
+ * extend an *existing match* by one combining mark at a time.
+ * Therefore, ae must be a contraction so that the builder finds
+ * discontiguous contractions for ae^, for example with an intervening underdot.
+ * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
+ */
+ UChar rule3[256]={
+ 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
+ 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
+ 0};
static const UChar tData[][20]={
{0x1EAC, 0},
{0x0041, 0x0323, 0x0302, 0},
@@ -5098,18 +3735,22 @@ TestVI5913(void)
coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
tLen = u_strlen(tailorData3[3]);
kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
+ log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
+ for(i = 0; i<kLen; i++) {
+ log_verbose(" %02X", expColl[i]);
+ }
for (j=4; j<6; j++) {
tLen = u_strlen(tailorData3[j]);
rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
- log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
+ log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
for(i = 0; i<rLen; i++) {
log_err(" %02X", resColl[i]);
}
}
- log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
+ log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
for(i = 0; i<rLen; i++) {
log_verbose(" %02X", resColl[i]);
}
@@ -5153,11 +3794,15 @@ TestTailor6179(void)
/*
* These values from FractionalUCA.txt will change,
* and need to be updated here.
+ * TODO: Make this not check for particular sort keys.
+ * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
*/
- static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
- static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
- static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
- static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
+ static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
+ static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
+ static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
+ static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
+
+ UParseError parseError;
/* Test [Last Primary ignorable] */
@@ -5191,10 +3836,12 @@ TestTailor6179(void)
/* Test [Last Secondary ignorable] */
log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
- ruleLen = u_strlen(rule1);
- coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
+ ruleLen = u_strlen(rule2);
+ coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
if (U_FAILURE(status)) {
log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
+ log_info(" offset=%d \"%s\" | \"%s\"\n",
+ parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
return;
}
tLen = u_strlen(tData2[0]);
@@ -5206,16 +3853,14 @@ TestTailor6179(void)
}
log_err("\n");
}
- if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see ticket #8982 */
- tLen = u_strlen(tData2[1]);
- rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
- if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
- log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
- for(i = 0; i<rLen; i++) {
- log_err(" %02X", resColl[i]);
- }
- log_err("\n");
+ tLen = u_strlen(tData2[1]);
+ rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
+ if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
+ log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
+ for(i = 0; i<rLen; i++) {
+ log_err(" %02X", resColl[i]);
}
+ log_err("\n");
}
ucol_close(coll);
}
@@ -5582,6 +4227,10 @@ static void doTestOneTestCase(const OneTestCase testcases[],
myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
if(U_FAILURE(status)){
log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
+ log_info(" offset=%d \"%s\" | \"%s\"\n",
+ parse_error.offset,
+ aescstrdup(parse_error.preContext, -1),
+ aescstrdup(parse_error.postContext, -1));
return;
}
log_verbose("Testing the <<* syntax\n");
@@ -5627,13 +4276,13 @@ const static OneTestCase rangeTestcases[] = {
static int nRangeTestcases = LEN(rangeTestcases);
const static OneTestCase rangeTestcasesSupplemental[] = {
- { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */
- { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */
+ { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */
+ { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */
{ {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
- { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */
+ { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */
{ {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
{ {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
- { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */
+ { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */
};
static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
@@ -5690,10 +4339,10 @@ static void TestSameStrengthListQuoted(void)
static void TestSameStrengthListSupplemental(void)
{
const char* strRules[] = {
- "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
- "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
- "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
- "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
+ "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
+ "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
+ "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
+ "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
};
doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
}
@@ -5741,7 +4390,8 @@ static void TestSameStrengthListRanges(void)
static void TestSameStrengthListSupplementalRanges(void)
{
const char* strRules[] = {
- "&\\ufffe<*\\uffff-\\U00010002",
+ /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
+ "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
};
doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
}
@@ -6041,6 +4691,7 @@ static void TestReorderingAPI(void)
int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
+ int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
UCollationResult collResult;
int32_t retrievedReorderCodesLength;
int32_t retrievedReorderCodes[10];
@@ -6118,6 +4769,22 @@ static void TestReorderingAPI(void)
return;
}
+ /* clear the reordering using [NONE] */
+ ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
+ if (U_FAILURE(status)) {
+ log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
+ return;
+ }
+
+ /* get the reordering again */
+ retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
+ if (retrievedReorderCodesLength != 0) {
+ log_err_status(status,
+ "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
+ retrievedReorderCodesLength);
+ return;
+ }
+
/* test for error condition on duplicate reorder codes */
ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
if (!U_FAILURE(status)) {
@@ -6272,17 +4939,22 @@ static void TestReorderingAPIWithRuleCreatedCollator(void)
ucol_close(myCollation);
}
-static int compareUScriptCodes(const void * a, const void * b)
-{
- return ( *(int32_t*)a - *(int32_t*)b );
+static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
+ int32_t i;
+ for (i = 0; i < length; ++i) {
+ if (expectedScript == scripts[i]) { return TRUE; }
+ }
+ return FALSE;
}
static void TestEquivalentReorderingScripts(void) {
UErrorCode status = U_ZERO_ERROR;
- int32_t equivalentScripts[50];
- int32_t equivalentScriptsLength;
- int loopIndex;
- int32_t equivalentScriptsResult[] = {
+ int32_t equivalentScripts[100];
+ int32_t length;
+ int i;
+ int32_t prevScript;
+ /* At least these scripts are expected to be equivalent. There may be more. */
+ static const int32_t expectedScripts[] = {
USCRIPT_BOPOMOFO,
USCRIPT_LISU,
USCRIPT_LYCIAN,
@@ -6311,46 +4983,49 @@ static void TestEquivalentReorderingScripts(void) {
USCRIPT_MEROITIC_HIEROGLYPHS
};
- qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
-
/* UScript.GOTHIC */
- equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
+ length = ucol_getEquivalentReorderCodes(
+ USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
if (U_FAILURE(status)) {
- log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
+ log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
return;
}
- /*
- fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
- fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
- for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
- fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
+ if (length < LEN(expectedScripts)) {
+ log_err("ERROR/Gothic: retrieved equivalent script length wrong: "
+ "expected at least %d, was = %d\n",
+ LEN(expectedScripts), length);
}
- */
- if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
- log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
- return;
+ prevScript = -1;
+ for (i = 0; i < length; ++i) {
+ int32_t script = equivalentScripts[i];
+ if (script <= prevScript) {
+ log_err("ERROR/Gothic: equivalent scripts out of order at index %d\n", i);
+ }
+ prevScript = script;
}
- for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
- if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
- log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
- return;
+ for (i = 0; i < LEN(expectedScripts); i++) {
+ if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
+ log_err("ERROR/Gothic: equivalent scripts do not contain %d\n",
+ expectedScripts[i]);
}
}
/* UScript.SHAVIAN */
- equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
+ length = ucol_getEquivalentReorderCodes(
+ USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
if (U_FAILURE(status)) {
- log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
+ log_err_status(status, "ERROR/Shavian: retrieving equivalent reorder codes: %s\n", myErrorName(status));
return;
}
- if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
- log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
- return;
+ if (length < LEN(expectedScripts)) {
+ log_err("ERROR/Shavian: retrieved equivalent script length wrong: "
+ "expected at least %d, was = %d\n",
+ LEN(expectedScripts), length);
}
- for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
- if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
- log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
- return;
+ for (i = 0; i < LEN(expectedScripts); i++) {
+ if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
+ log_err("ERROR/Shavian: equivalent scripts do not contain %d\n",
+ expectedScripts[i]);
}
}
}
@@ -6854,6 +5529,11 @@ static void TestImport(void)
}
virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
+ if(viruleslength == 0) {
+ log_data_err("missing vi tailoring rule string\n");
+ ucol_close(vicoll);
+ return;
+ }
escoll = ucol_open("es", &status);
esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
@@ -6953,6 +5633,11 @@ static void TestImportWithType(void)
return;
}
virules = ucol_getRules(vicoll, &viruleslength);
+ if(viruleslength == 0) {
+ log_data_err("missing vi tailoring rule string\n");
+ ucol_close(vicoll);
+ return;
+ }
/* decoll = ucol_open("de@collation=phonebook", &status); */
decoll = ucol_open("de-u-co-phonebk", &status);
if(U_FAILURE(status)){
@@ -7076,7 +5761,7 @@ static const LongUpperStrItem longUpperStrItems[] = {
{ NULL, 0 }
};
-enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
+enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
static void TestCaseLevelBufferOverflow(void)
@@ -7114,6 +5799,38 @@ static void TestCaseLevelBufferOverflow(void)
}
}
+/* Test for #10595 */
+static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
+#define KEY_PART_SIZE 16
+
+static void TestNextSortKeyPartJaIdentical(void)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UCollator *coll;
+ uint8_t keyPart[KEY_PART_SIZE];
+ UCharIterator iter;
+ uint32_t state[2] = {0, 0};
+ int32_t keyPartLen;
+
+ coll = ucol_open("ja", &status);
+ ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
+ if (U_FAILURE(status)) {
+ log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
+ return;
+ }
+
+ uiter_setString(&iter, testJapaneseName, 5);
+ keyPartLen = KEY_PART_SIZE;
+ while (keyPartLen == KEY_PART_SIZE) {
+ keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
+ if (U_FAILURE(status)) {
+ log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
+ break;
+ }
+ }
+
+ ucol_close(coll);
+}
#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
@@ -7131,6 +5848,7 @@ void addMiscCollTest(TestNode** root)
TEST(TestExtremeCompression);
TEST(TestSurrogates);
TEST(TestVariableTopSetting);
+ TEST(TestMaxVariable);
TEST(TestBocsuCoverage);
TEST(TestCyrillicTailoring);
TEST(TestCase);
@@ -7138,9 +5856,6 @@ void addMiscCollTest(TestNode** root)
TEST(BlackBirdTest);
TEST(FunkyATest);
TEST(BillFairmanTest);
- TEST(RamsRulesTest);
- TEST(IsTailoredTest);
- TEST(TestCollations);
TEST(TestChMove);
TEST(TestImplicitTailoring);
TEST(TestFCDProblem);
@@ -7149,8 +5864,6 @@ void addMiscCollTest(TestNode** root)
TEST(TestJ815);
/*TEST(TestJ831);*/ /* we changed lv locale */
TEST(TestBefore);
- TEST(TestRedundantRules);
- TEST(TestExpansionSyntax);
TEST(TestHangulTailoring);
TEST(TestUCARules);
TEST(TestIncrementalNormalize);
@@ -7172,7 +5885,6 @@ void addMiscCollTest(TestNode** root)
TEST(TestNumericCollation);
TEST(TestTibetanConformance);
TEST(TestPinyinProblem);
- TEST(TestImplicitGeneration);
TEST(TestSeparateTrees);
TEST(TestBeforePinyin);
TEST(TestBeforeTightening);
@@ -7224,6 +5936,7 @@ void addMiscCollTest(TestNode** root)
TEST(TestReorderWithNumericCollation);
TEST(TestCaseLevelBufferOverflow);
+ TEST(TestNextSortKeyPartJaIdentical);
}
#endif /* #if !UCONFIG_NO_COLLATION */
« no previous file with comments | « source/test/cintltst/cloctst.c ('k') | source/test/cintltst/cmsgtst.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698