| Index: icu46/source/test/cintltst/citertst.c
|
| ===================================================================
|
| --- icu46/source/test/cintltst/citertst.c (revision 0)
|
| +++ icu46/source/test/cintltst/citertst.c (revision 0)
|
| @@ -0,0 +1,2020 @@
|
| +/********************************************************************
|
| + * COPYRIGHT:
|
| + * Copyright (c) 1997-2010, International Business Machines Corporation and
|
| + * others. All Rights Reserved.
|
| + ********************************************************************/
|
| +/********************************************************************************
|
| +*
|
| +* File CITERTST.C
|
| +*
|
| +* Modification History:
|
| +* Date Name Description
|
| +* Madhu Katragadda Ported for C API
|
| +* 02/19/01 synwee Modified test case for new collation iterator
|
| +*********************************************************************************/
|
| +/*
|
| + * Collation Iterator tests.
|
| + * (Let me reiterate my position...)
|
| + */
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION
|
| +
|
| +#include "unicode/ucol.h"
|
| +#include "unicode/uloc.h"
|
| +#include "unicode/uchar.h"
|
| +#include "unicode/ustring.h"
|
| +#include "unicode/putil.h"
|
| +#include "callcoll.h"
|
| +#include "cmemory.h"
|
| +#include "cintltst.h"
|
| +#include "citertst.h"
|
| +#include "ccolltst.h"
|
| +#include "filestrm.h"
|
| +#include "cstring.h"
|
| +#include "ucol_imp.h"
|
| +#include "ucol_tok.h"
|
| +#include "uparse.h"
|
| +#include <stdio.h>
|
| +
|
| +extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
|
| +
|
| +void addCollIterTest(TestNode** root)
|
| +{
|
| + addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
|
| + addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
|
| + addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
|
| + addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
|
| + addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
|
| + addTest(root, &TestNormalizedUnicodeChar,
|
| + "tscoll/citertst/TestNormalizedUnicodeChar");
|
| + addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
|
| + addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
|
| + addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
|
| + addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
|
| + addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
|
| + addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
|
| + addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
|
| + addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
|
| + addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
|
| +}
|
| +
|
| +/* The locales we support */
|
| +
|
| +static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
|
| +
|
| +static void TestBug672() {
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UChar pattern[20];
|
| + UChar text[50];
|
| + int i;
|
| + int result[3][3];
|
| +
|
| + u_uastrcpy(pattern, "resume");
|
| + u_uastrcpy(text, "Time to resume updating my resume.");
|
| +
|
| + for (i = 0; i < 3; ++ i) {
|
| + UCollator *coll = ucol_open(LOCALES[i], &status);
|
| + UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
|
| + &status);
|
| + UCollationElements *titer = ucol_openElements(coll, text, -1,
|
| + &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| +
|
| + log_verbose("locale tested %s\n", LOCALES[i]);
|
| +
|
| + while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
|
| + U_SUCCESS(status)) {
|
| + }
|
| + if (U_FAILURE(status)) {
|
| + log_err("ERROR: reversing collation iterator :%s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + ucol_reset(pitr);
|
| +
|
| + ucol_setOffset(titer, u_strlen(pattern), &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("ERROR: setting offset in collator :%s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + result[i][0] = ucol_getOffset(titer);
|
| + log_verbose("Text iterator set to offset %d\n", result[i][0]);
|
| +
|
| + /* Use previous() */
|
| + ucol_previous(titer, &status);
|
| + result[i][1] = ucol_getOffset(titer);
|
| + log_verbose("Current offset %d after previous\n", result[i][1]);
|
| +
|
| + /* Add one to index */
|
| + log_verbose("Adding one to current offset...\n");
|
| + ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("ERROR: setting offset in collator :%s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + result[i][2] = ucol_getOffset(titer);
|
| + log_verbose("Current offset in text = %d\n", result[i][2]);
|
| + ucol_closeElements(pitr);
|
| + ucol_closeElements(titer);
|
| + ucol_close(coll);
|
| + }
|
| +
|
| + if (uprv_memcmp(result[0], result[1], 3) != 0 ||
|
| + uprv_memcmp(result[1], result[2], 3) != 0) {
|
| + log_err("ERROR: Different locales have different offsets at the same character\n");
|
| + }
|
| +}
|
| +
|
| +
|
| +
|
| +/* Running this test with normalization enabled showed up a bug in the incremental
|
| + normalization code. */
|
| +static void TestBug672Normalize() {
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UChar pattern[20];
|
| + UChar text[50];
|
| + int i;
|
| + int result[3][3];
|
| +
|
| + u_uastrcpy(pattern, "resume");
|
| + u_uastrcpy(text, "Time to resume updating my resume.");
|
| +
|
| + for (i = 0; i < 3; ++ i) {
|
| + UCollator *coll = ucol_open(LOCALES[i], &status);
|
| + UCollationElements *pitr = NULL;
|
| + UCollationElements *titer = NULL;
|
| +
|
| + ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
| +
|
| + pitr = ucol_openElements(coll, pattern, -1, &status);
|
| + titer = ucol_openElements(coll, text, -1, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| +
|
| + log_verbose("locale tested %s\n", LOCALES[i]);
|
| +
|
| + while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
|
| + U_SUCCESS(status)) {
|
| + }
|
| + if (U_FAILURE(status)) {
|
| + log_err("ERROR: reversing collation iterator :%s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + ucol_reset(pitr);
|
| +
|
| + ucol_setOffset(titer, u_strlen(pattern), &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("ERROR: setting offset in collator :%s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + result[i][0] = ucol_getOffset(titer);
|
| + log_verbose("Text iterator set to offset %d\n", result[i][0]);
|
| +
|
| + /* Use previous() */
|
| + ucol_previous(titer, &status);
|
| + result[i][1] = ucol_getOffset(titer);
|
| + log_verbose("Current offset %d after previous\n", result[i][1]);
|
| +
|
| + /* Add one to index */
|
| + log_verbose("Adding one to current offset...\n");
|
| + ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("ERROR: setting offset in collator :%s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + result[i][2] = ucol_getOffset(titer);
|
| + log_verbose("Current offset in text = %d\n", result[i][2]);
|
| + ucol_closeElements(pitr);
|
| + ucol_closeElements(titer);
|
| + ucol_close(coll);
|
| + }
|
| +
|
| + if (uprv_memcmp(result[0], result[1], 3) != 0 ||
|
| + uprv_memcmp(result[1], result[2], 3) != 0) {
|
| + log_err("ERROR: Different locales have different offsets at the same character\n");
|
| + }
|
| +}
|
| +
|
| +
|
| +
|
| +
|
| +/**
|
| + * Test for CollationElementIterator previous and next for the whole set of
|
| + * unicode characters.
|
| + */
|
| +static void TestUnicodeChar()
|
| +{
|
| + UChar source[0x100];
|
| + UCollator *en_us;
|
| + UCollationElements *iter;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UChar codepoint;
|
| +
|
| + UChar *test;
|
| + en_us = ucol_open("en_US", &status);
|
| + if (U_FAILURE(status)){
|
| + log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| +
|
| + for (codepoint = 1; codepoint < 0xFFFE;)
|
| + {
|
| + test = source;
|
| +
|
| + while (codepoint % 0xFF != 0)
|
| + {
|
| + if (u_isdefined(codepoint))
|
| + *(test ++) = codepoint;
|
| + codepoint ++;
|
| + }
|
| +
|
| + if (u_isdefined(codepoint))
|
| + *(test ++) = codepoint;
|
| +
|
| + if (codepoint != 0xFFFF)
|
| + codepoint ++;
|
| +
|
| + *test = 0;
|
| + iter=ucol_openElements(en_us, source, u_strlen(source), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(en_us);
|
| + return;
|
| + }
|
| + /* A basic test to see if it's working at all */
|
| + log_verbose("codepoint testing %x\n", codepoint);
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| +
|
| + /* null termination test */
|
| + iter=ucol_openElements(en_us, source, -1, &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(en_us);
|
| + return;
|
| + }
|
| + /* A basic test to see if it's working at all */
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + }
|
| +
|
| + ucol_close(en_us);
|
| +}
|
| +
|
| +/**
|
| + * Test for CollationElementIterator previous and next for the whole set of
|
| + * unicode characters with normalization on.
|
| + */
|
| +static void TestNormalizedUnicodeChar()
|
| +{
|
| + UChar source[0x100];
|
| + UCollator *th_th;
|
| + UCollationElements *iter;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UChar codepoint;
|
| +
|
| + UChar *test;
|
| + /* thai should have normalization on */
|
| + th_th = ucol_open("th_TH", &status);
|
| + if (U_FAILURE(status)){
|
| + log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| +
|
| + for (codepoint = 1; codepoint < 0xFFFE;)
|
| + {
|
| + test = source;
|
| +
|
| + while (codepoint % 0xFF != 0)
|
| + {
|
| + if (u_isdefined(codepoint))
|
| + *(test ++) = codepoint;
|
| + codepoint ++;
|
| + }
|
| +
|
| + if (u_isdefined(codepoint))
|
| + *(test ++) = codepoint;
|
| +
|
| + if (codepoint != 0xFFFF)
|
| + codepoint ++;
|
| +
|
| + *test = 0;
|
| + iter=ucol_openElements(th_th, source, u_strlen(source), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(th_th);
|
| + return;
|
| + }
|
| +
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| +
|
| + iter=ucol_openElements(th_th, source, -1, &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(th_th);
|
| + return;
|
| + }
|
| +
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + }
|
| +
|
| + ucol_close(th_th);
|
| +}
|
| +
|
| +/**
|
| +* Test the incremental normalization
|
| +*/
|
| +static void TestNormalization()
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + const char *str =
|
| + "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
|
| + UCollator *coll;
|
| + UChar rule[50];
|
| + int rulelen = u_unescape(str, rule, 50);
|
| + int count = 0;
|
| + const char *testdata[] =
|
| + {"\\u1ED9", "o\\u0323\\u0302",
|
| + "\\u0300\\u0315", "\\u0315\\u0300",
|
| + "A\\u0300\\u0315B", "A\\u0315\\u0300B",
|
| + "A\\u0316\\u0315B", "A\\u0315\\u0316B",
|
| + "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
|
| + "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
|
| + "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
|
| + int32_t srclen;
|
| + UChar source[10];
|
| + UCollationElements *iter;
|
| +
|
| + coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
|
| + ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
| + if (U_FAILURE(status)){
|
| + log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| +
|
| + srclen = u_unescape(testdata[0], source, 10);
|
| + iter = ucol_openElements(coll, source, srclen, &status);
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| +
|
| + srclen = u_unescape(testdata[1], source, 10);
|
| + iter = ucol_openElements(coll, source, srclen, &status);
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| +
|
| + while (count < 12) {
|
| + srclen = u_unescape(testdata[count], source, 10);
|
| + iter = ucol_openElements(coll, source, srclen, &status);
|
| +
|
| + if (U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collator element iterator\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| +
|
| + iter = ucol_openElements(coll, source, -1, &status);
|
| +
|
| + if (U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collator element iterator\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + count ++;
|
| + }
|
| + ucol_close(coll);
|
| +}
|
| +
|
| +/**
|
| + * Test for CollationElementIterator.previous()
|
| + *
|
| + * @bug 4108758 - Make sure it works with contracting characters
|
| + *
|
| + */
|
| +static void TestPrevious()
|
| +{
|
| + UCollator *coll=NULL;
|
| + UChar rule[50];
|
| + UChar *source;
|
| + UCollator *c1, *c2, *c3;
|
| + UCollationElements *iter;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UChar test1[50];
|
| + UChar test2[50];
|
| +
|
| + u_uastrcpy(test1, "What subset of all possible test cases?");
|
| + u_uastrcpy(test2, "has the highest probability of detecting");
|
| + coll = ucol_open("en_US", &status);
|
| +
|
| + iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
|
| + log_verbose("English locale testing back and forth\n");
|
| + if(U_FAILURE(status)){
|
| + log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(coll);
|
| + return;
|
| + }
|
| + /* A basic test to see if it's working at all */
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + ucol_close(coll);
|
| +
|
| + /* Test with a contracting character sequence */
|
| + u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
|
| + c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
|
| +
|
| + log_verbose("Contraction rule testing back and forth with no normalization\n");
|
| +
|
| + if (c1 == NULL || U_FAILURE(status))
|
| + {
|
| + log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + source=(UChar*)malloc(sizeof(UChar) * 20);
|
| + u_uastrcpy(source, "abchdcba");
|
| + iter=ucol_openElements(c1, source, u_strlen(source), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + ucol_close(c1);
|
| +
|
| + /* Test with an expanding character sequence */
|
| + u_uastrcpy(rule, "&a < b < c/abd < d");
|
| + c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
|
| + log_verbose("Expansion rule testing back and forth with no normalization\n");
|
| + if (c2 == NULL || U_FAILURE(status))
|
| + {
|
| + log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + u_uastrcpy(source, "abcd");
|
| + iter=ucol_openElements(c2, source, u_strlen(source), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + ucol_close(c2);
|
| + /* Now try both */
|
| + u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
|
| + c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
|
| + log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
|
| +
|
| + if (c3 == NULL || U_FAILURE(status))
|
| + {
|
| + log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + u_uastrcpy(source, "abcdbchdc");
|
| + iter=ucol_openElements(c3, source, u_strlen(source), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + ucol_close(c3);
|
| + source[0] = 0x0e41;
|
| + source[1] = 0x0e02;
|
| + source[2] = 0x0e41;
|
| + source[3] = 0x0e02;
|
| + source[4] = 0x0e27;
|
| + source[5] = 0x61;
|
| + source[6] = 0x62;
|
| + source[7] = 0x63;
|
| + source[8] = 0;
|
| +
|
| + coll = ucol_open("th_TH", &status);
|
| + log_verbose("Thai locale testing back and forth with normalization\n");
|
| + iter=ucol_openElements(coll, source, u_strlen(source), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + ucol_close(coll);
|
| +
|
| + /* prev test */
|
| + source[0] = 0x0061;
|
| + source[1] = 0x30CF;
|
| + source[2] = 0x3099;
|
| + source[3] = 0x30FC;
|
| + source[4] = 0;
|
| +
|
| + coll = ucol_open("ja_JP", &status);
|
| + log_verbose("Japanese locale testing back and forth with normalization\n");
|
| + iter=ucol_openElements(coll, source, u_strlen(source), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + return;
|
| + }
|
| + backAndForth(iter);
|
| + ucol_closeElements(iter);
|
| + ucol_close(coll);
|
| +
|
| + free(source);
|
| +}
|
| +
|
| +/**
|
| + * Test for getOffset() and setOffset()
|
| + */
|
| +static void TestOffset()
|
| +{
|
| + UErrorCode status= U_ZERO_ERROR;
|
| + UCollator *en_us=NULL;
|
| + UCollationElements *iter, *pristine;
|
| + int32_t offset;
|
| + OrderAndOffset *orders;
|
| + int32_t orderLength=0;
|
| + int count = 0;
|
| + UChar test1[50];
|
| + UChar test2[50];
|
| +
|
| + u_uastrcpy(test1, "What subset of all possible test cases?");
|
| + u_uastrcpy(test2, "has the highest probability of detecting");
|
| + en_us = ucol_open("en_US", &status);
|
| + log_verbose("Testing getOffset and setOffset for collations\n");
|
| + iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(en_us);
|
| + return;
|
| + }
|
| +
|
| + /* testing boundaries */
|
| + ucol_setOffset(iter, 0, &status);
|
| + if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
|
| + log_err("Error: After setting offset to 0, we should be at the end "
|
| + "of the backwards iteration");
|
| + }
|
| + ucol_setOffset(iter, u_strlen(test1), &status);
|
| + if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
|
| + log_err("Error: After setting offset to end of the string, we should "
|
| + "be at the end of the backwards iteration");
|
| + }
|
| +
|
| + /* Run all the way through the iterator, then get the offset */
|
| +
|
| + orders = getOrders(iter, &orderLength);
|
| +
|
| + offset = ucol_getOffset(iter);
|
| +
|
| + if (offset != u_strlen(test1))
|
| + {
|
| + log_err("offset at end != length %d vs %d\n", offset,
|
| + u_strlen(test1) );
|
| + }
|
| +
|
| + /* Now set the offset back to the beginning and see if it works */
|
| + pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(en_us);
|
| + return;
|
| + }
|
| + status = U_ZERO_ERROR;
|
| +
|
| + ucol_setOffset(iter, 0, &status);
|
| + if (U_FAILURE(status))
|
| + {
|
| + log_err("setOffset failed. %s\n", myErrorName(status));
|
| + }
|
| + else
|
| + {
|
| + assertEqual(iter, pristine);
|
| + }
|
| +
|
| + ucol_closeElements(pristine);
|
| + ucol_closeElements(iter);
|
| + free(orders);
|
| +
|
| + /* testing offsets in normalization buffer */
|
| + test1[0] = 0x61;
|
| + test1[1] = 0x300;
|
| + test1[2] = 0x316;
|
| + test1[3] = 0x62;
|
| + test1[4] = 0;
|
| + ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
| + iter = ucol_openElements(en_us, test1, 4, &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(en_us);
|
| + return;
|
| + }
|
| +
|
| + count = 0;
|
| + while (ucol_next(iter, &status) != UCOL_NULLORDER &&
|
| + U_SUCCESS(status)) {
|
| + switch (count) {
|
| + case 0:
|
| + if (ucol_getOffset(iter) != 1) {
|
| + log_err("ERROR: Offset of iteration should be 1\n");
|
| + }
|
| + break;
|
| + case 3:
|
| + if (ucol_getOffset(iter) != 4) {
|
| + log_err("ERROR: Offset of iteration should be 4\n");
|
| + }
|
| + break;
|
| + default:
|
| + if (ucol_getOffset(iter) != 3) {
|
| + log_err("ERROR: Offset of iteration should be 3\n");
|
| + }
|
| + }
|
| + count ++;
|
| + }
|
| +
|
| + ucol_reset(iter);
|
| + count = 0;
|
| + while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
|
| + U_SUCCESS(status)) {
|
| + switch (count) {
|
| + case 0:
|
| + case 1:
|
| + if (ucol_getOffset(iter) != 3) {
|
| + log_err("ERROR: Offset of iteration should be 3\n");
|
| + }
|
| + break;
|
| + case 2:
|
| + if (ucol_getOffset(iter) != 1) {
|
| + log_err("ERROR: Offset of iteration should be 1\n");
|
| + }
|
| + break;
|
| + default:
|
| + if (ucol_getOffset(iter) != 0) {
|
| + log_err("ERROR: Offset of iteration should be 0\n");
|
| + }
|
| + }
|
| + count ++;
|
| + }
|
| +
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in iterating collation elements %s\n",
|
| + myErrorName(status));
|
| + }
|
| +
|
| + ucol_closeElements(iter);
|
| + ucol_close(en_us);
|
| +}
|
| +
|
| +/**
|
| + * Test for setText()
|
| + */
|
| +static void TestSetText()
|
| +{
|
| + int32_t c,i;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UCollator *en_us=NULL;
|
| + UCollationElements *iter1, *iter2;
|
| + UChar test1[50];
|
| + UChar test2[50];
|
| +
|
| + u_uastrcpy(test1, "What subset of all possible test cases?");
|
| + u_uastrcpy(test2, "has the highest probability of detecting");
|
| + en_us = ucol_open("en_US", &status);
|
| + log_verbose("testing setText for Collation elements\n");
|
| + iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(en_us);
|
| + return;
|
| + }
|
| + iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
|
| + if(U_FAILURE(status)){
|
| + log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
|
| + myErrorName(status));
|
| + ucol_close(en_us);
|
| + return;
|
| + }
|
| +
|
| + /* Run through the second iterator just to exercise it */
|
| + c = ucol_next(iter2, &status);
|
| + i = 0;
|
| +
|
| + while ( ++i < 10 && (c != UCOL_NULLORDER))
|
| + {
|
| + if (U_FAILURE(status))
|
| + {
|
| + log_err("iter2->next() returned an error. %s\n", myErrorName(status));
|
| + ucol_closeElements(iter2);
|
| + ucol_closeElements(iter1);
|
| + ucol_close(en_us);
|
| + return;
|
| + }
|
| +
|
| + c = ucol_next(iter2, &status);
|
| + }
|
| +
|
| + /* Now set it to point to the same string as the first iterator */
|
| + ucol_setText(iter2, test1, u_strlen(test1), &status);
|
| + if (U_FAILURE(status))
|
| + {
|
| + log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
|
| + }
|
| + else
|
| + {
|
| + assertEqual(iter1, iter2);
|
| + }
|
| +
|
| + /* Now set it to point to a null string with fake length*/
|
| + ucol_setText(iter2, NULL, 2, &status);
|
| + if (U_FAILURE(status))
|
| + {
|
| + log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
|
| + }
|
| + else
|
| + {
|
| + if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
|
| + log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
|
| + }
|
| + }
|
| +
|
| + ucol_closeElements(iter2);
|
| + ucol_closeElements(iter1);
|
| + ucol_close(en_us);
|
| +}
|
| +
|
| +/** @bug 4108762
|
| + * Test for getMaxExpansion()
|
| + */
|
| +static void TestMaxExpansion()
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UCollator *coll ;/*= ucol_open("en_US", &status);*/
|
| + UChar ch = 0;
|
| + UChar32 unassigned = 0xEFFFD;
|
| + UChar supplementary[2];
|
| + uint32_t stringOffset = 0;
|
| + UBool isError = FALSE;
|
| + uint32_t sorder = 0;
|
| + UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
|
| + uint32_t temporder = 0;
|
| +
|
| + UChar rule[256];
|
| + u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
|
| + coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
|
| + UCOL_DEFAULT_STRENGTH,NULL, &status);
|
| + if(U_SUCCESS(status) && coll) {
|
| + iter = ucol_openElements(coll, &ch, 1, &status);
|
| +
|
| + while (ch < 0xFFFF && U_SUCCESS(status)) {
|
| + int count = 1;
|
| + uint32_t order;
|
| + int32_t size = 0;
|
| +
|
| + ch ++;
|
| +
|
| + ucol_setText(iter, &ch, 1, &status);
|
| + order = ucol_previous(iter, &status);
|
| +
|
| + /* thai management */
|
| + if (order == 0)
|
| + order = ucol_previous(iter, &status);
|
| +
|
| + while (U_SUCCESS(status) &&
|
| + ucol_previous(iter, &status) != UCOL_NULLORDER) {
|
| + count ++;
|
| + }
|
| +
|
| + size = ucol_getMaxExpansion(iter, order);
|
| + if (U_FAILURE(status) || size < count) {
|
| + log_err("Failure at codepoint %d, maximum expansion count < %d\n",
|
| + ch, count);
|
| + }
|
| + }
|
| +
|
| + /* testing for exact max expansion */
|
| + ch = 0;
|
| + while (ch < 0x61) {
|
| + uint32_t order;
|
| + int32_t size;
|
| + ucol_setText(iter, &ch, 1, &status);
|
| + order = ucol_previous(iter, &status);
|
| + size = ucol_getMaxExpansion(iter, order);
|
| + if (U_FAILURE(status) || size != 1) {
|
| + log_err("Failure at codepoint %d, maximum expansion count < %d\n",
|
| + ch, 1);
|
| + }
|
| + ch ++;
|
| + }
|
| +
|
| + ch = 0x63;
|
| + ucol_setText(iter, &ch, 1, &status);
|
| + temporder = ucol_previous(iter, &status);
|
| +
|
| + if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
|
| + log_err("Failure at codepoint %d, maximum expansion count != %d\n",
|
| + ch, 3);
|
| + }
|
| +
|
| + ch = 0x64;
|
| + ucol_setText(iter, &ch, 1, &status);
|
| + temporder = ucol_previous(iter, &status);
|
| +
|
| + if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
|
| + log_err("Failure at codepoint %d, maximum expansion count != %d\n",
|
| + ch, 3);
|
| + }
|
| +
|
| + U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
|
| + ucol_setText(iter, supplementary, 2, &status);
|
| + sorder = ucol_previous(iter, &status);
|
| +
|
| + if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
|
| + log_err("Failure at codepoint %d, maximum expansion count < %d\n",
|
| + ch, 2);
|
| + }
|
| +
|
| + /* testing jamo */
|
| + ch = 0x1165;
|
| +
|
| + ucol_setText(iter, &ch, 1, &status);
|
| + temporder = ucol_previous(iter, &status);
|
| + if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
|
| + log_err("Failure at codepoint %d, maximum expansion count > %d\n",
|
| + ch, 3);
|
| + }
|
| +
|
| + ucol_closeElements(iter);
|
| + ucol_close(coll);
|
| +
|
| + /* testing special jamo &a<\u1160 */
|
| + rule[0] = 0x26;
|
| + rule[1] = 0x71;
|
| + rule[2] = 0x3c;
|
| + rule[3] = 0x1165;
|
| + rule[4] = 0x2f;
|
| + rule[5] = 0x71;
|
| + rule[6] = 0x71;
|
| + rule[7] = 0x71;
|
| + rule[8] = 0x71;
|
| + rule[9] = 0;
|
| +
|
| + coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
|
| + UCOL_DEFAULT_STRENGTH,NULL, &status);
|
| + iter = ucol_openElements(coll, &ch, 1, &status);
|
| +
|
| + temporder = ucol_previous(iter, &status);
|
| + if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
|
| + log_err("Failure at codepoint %d, maximum expansion count > %d\n",
|
| + ch, 5);
|
| + }
|
| +
|
| + ucol_closeElements(iter);
|
| + ucol_close(coll);
|
| + } else {
|
| + log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
|
| + }
|
| +
|
| +}
|
| +
|
| +
|
| +static void assertEqual(UCollationElements *i1, UCollationElements *i2)
|
| +{
|
| + int32_t c1, c2;
|
| + int32_t count = 0;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| +
|
| + do
|
| + {
|
| + c1 = ucol_next(i1, &status);
|
| + c2 = ucol_next(i2, &status);
|
| +
|
| + if (c1 != c2)
|
| + {
|
| + log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
|
| + break;
|
| + }
|
| +
|
| + count += 1;
|
| + }
|
| + while (c1 != UCOL_NULLORDER);
|
| +}
|
| +
|
| +/**
|
| + * Testing iterators with extremely small buffers
|
| + */
|
| +static void TestSmallBuffer()
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UCollator *coll;
|
| + UCollationElements *testiter,
|
| + *iter;
|
| + int32_t count = 0;
|
| + OrderAndOffset *testorders,
|
| + *orders;
|
| +
|
| + UChar teststr[500];
|
| + UChar str[] = {0x300, 0x31A, 0};
|
| + /*
|
| + creating a long string of decomposable characters,
|
| + since by default the writable buffer is of size 256
|
| + */
|
| + while (count < 500) {
|
| + if ((count & 1) == 0) {
|
| + teststr[count ++] = 0x300;
|
| + }
|
| + else {
|
| + teststr[count ++] = 0x31A;
|
| + }
|
| + }
|
| +
|
| + coll = ucol_open("th_TH", &status);
|
| + if(U_SUCCESS(status) && coll) {
|
| + testiter = ucol_openElements(coll, teststr, 500, &status);
|
| + iter = ucol_openElements(coll, str, 2, &status);
|
| +
|
| + orders = getOrders(iter, &count);
|
| + if (count != 2) {
|
| + log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
|
| + }
|
| +
|
| + /*
|
| + this will rearrange the string data to 250 characters of 0x300 first then
|
| + 250 characters of 0x031A
|
| + */
|
| + testorders = getOrders(testiter, &count);
|
| +
|
| + if (count != 500) {
|
| + log_err("Error decomposition does not give the right sized collation elements\n");
|
| + }
|
| +
|
| + while (count != 0) {
|
| + /* UCA collation element for 0x0F76 */
|
| + if ((count > 250 && testorders[-- count].order != orders[1].order) ||
|
| + (count <= 250 && testorders[-- count].order != orders[0].order)) {
|
| + log_err("Error decomposition does not give the right collation element at %d count\n", count);
|
| + break;
|
| + }
|
| + }
|
| +
|
| + free(testorders);
|
| + free(orders);
|
| +
|
| + ucol_reset(testiter);
|
| +
|
| + /* ensures closing of elements done properly to clear writable buffer */
|
| + ucol_next(testiter, &status);
|
| + ucol_next(testiter, &status);
|
| + ucol_closeElements(testiter);
|
| + ucol_closeElements(iter);
|
| + ucol_close(coll);
|
| + } else {
|
| + log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
|
| + }
|
| +}
|
| +
|
| +/**
|
| +* Sniplets of code from genuca
|
| +*/
|
| +static int32_t hex2num(char hex) {
|
| + if(hex>='0' && hex <='9') {
|
| + return hex-'0';
|
| + } else if(hex>='a' && hex<='f') {
|
| + return hex-'a'+10;
|
| + } else if(hex>='A' && hex<='F') {
|
| + return hex-'A'+10;
|
| + } else {
|
| + return 0;
|
| + }
|
| +}
|
| +
|
| +/**
|
| +* Getting codepoints from a string
|
| +* @param str character string contain codepoints seperated by space and ended
|
| +* by a semicolon
|
| +* @param codepoints array for storage, assuming size > 5
|
| +* @return position at the end of the codepoint section
|
| +*/
|
| +static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
|
| + UErrorCode errorCode = U_ZERO_ERROR;
|
| + char *semi = uprv_strchr(str, ';');
|
| + char *pipe = uprv_strchr(str, '|');
|
| + char *s;
|
| + *codepoints = 0;
|
| + *contextCPs = 0;
|
| + if(semi == NULL) {
|
| + log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
|
| + return str;
|
| + }
|
| + if(pipe != NULL) {
|
| + int32_t contextLength;
|
| + *pipe = 0;
|
| + contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
|
| + *pipe = '|';
|
| + if(U_FAILURE(errorCode)) {
|
| + log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
|
| + return str;
|
| + }
|
| + /* prepend the precontext string to the codepoints */
|
| + u_memcpy(codepoints, contextCPs, contextLength);
|
| + codepoints += contextLength;
|
| + /* start of the code point string */
|
| + s = pipe + 1;
|
| + } else {
|
| + s = str;
|
| + }
|
| + u_parseString(s, codepoints, 99, NULL, &errorCode);
|
| + if(U_FAILURE(errorCode)) {
|
| + log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
|
| + return str;
|
| + }
|
| + return semi + 1;
|
| +}
|
| +
|
| +/**
|
| +* Sniplets of code from genuca
|
| +*/
|
| +static int32_t
|
| +readElement(char **from, char *to, char separator, UErrorCode *status)
|
| +{
|
| + if (U_SUCCESS(*status)) {
|
| + char buffer[1024];
|
| + int32_t i = 0;
|
| + while (**from != separator) {
|
| + if (**from != ' ') {
|
| + *(buffer+i++) = **from;
|
| + }
|
| + (*from)++;
|
| + }
|
| + (*from)++;
|
| + *(buffer + i) = 0;
|
| + strcpy(to, buffer);
|
| + return i/2;
|
| + }
|
| +
|
| + return 0;
|
| +}
|
| +
|
| +/**
|
| +* Sniplets of code from genuca
|
| +*/
|
| +static uint32_t
|
| +getSingleCEValue(char *primary, char *secondary, char *tertiary,
|
| + UErrorCode *status)
|
| +{
|
| + if (U_SUCCESS(*status)) {
|
| + uint32_t value = 0;
|
| + char primsave = '\0';
|
| + char secsave = '\0';
|
| + char tersave = '\0';
|
| + char *primend = primary+4;
|
| + char *secend = secondary+2;
|
| + char *terend = tertiary+2;
|
| + uint32_t primvalue;
|
| + uint32_t secvalue;
|
| + uint32_t tervalue;
|
| +
|
| + if (uprv_strlen(primary) > 4) {
|
| + primsave = *primend;
|
| + *primend = '\0';
|
| + }
|
| +
|
| + if (uprv_strlen(secondary) > 2) {
|
| + secsave = *secend;
|
| + *secend = '\0';
|
| + }
|
| +
|
| + if (uprv_strlen(tertiary) > 2) {
|
| + tersave = *terend;
|
| + *terend = '\0';
|
| + }
|
| +
|
| + primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
|
| + secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
|
| + tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
|
| + if(primvalue <= 0xFF) {
|
| + primvalue <<= 8;
|
| + }
|
| +
|
| + value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
|
| + | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
|
| + | (tervalue & UCOL_TERTIARYORDERMASK);
|
| +
|
| + if(primsave!='\0') {
|
| + *primend = primsave;
|
| + }
|
| + if(secsave!='\0') {
|
| + *secend = secsave;
|
| + }
|
| + if(tersave!='\0') {
|
| + *terend = tersave;
|
| + }
|
| + return value;
|
| + }
|
| + return 0;
|
| +}
|
| +
|
| +/**
|
| +* Getting collation elements generated from a string
|
| +* @param str character string contain collation elements contained in [] and
|
| +* seperated by space
|
| +* @param ce array for storage, assuming size > 20
|
| +* @param status error status
|
| +* @return position at the end of the codepoint section
|
| +*/
|
| +static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
|
| + char *pStartCP = uprv_strchr(str, '[');
|
| + int count = 0;
|
| + char *pEndCP;
|
| + char primary[100];
|
| + char secondary[100];
|
| + char tertiary[100];
|
| +
|
| + while (*pStartCP == '[') {
|
| + uint32_t primarycount = 0;
|
| + uint32_t secondarycount = 0;
|
| + uint32_t tertiarycount = 0;
|
| + uint32_t CEi = 1;
|
| + pEndCP = strchr(pStartCP, ']');
|
| + if(pEndCP == NULL) {
|
| + break;
|
| + }
|
| + pStartCP ++;
|
| +
|
| + primarycount = readElement(&pStartCP, primary, ',', status);
|
| + secondarycount = readElement(&pStartCP, secondary, ',', status);
|
| + tertiarycount = readElement(&pStartCP, tertiary, ']', status);
|
| +
|
| + /* I want to get the CEs entered right here, including continuation */
|
| + ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
|
| + if (U_FAILURE(*status)) {
|
| + break;
|
| + }
|
| +
|
| + while (2 * CEi < primarycount || CEi < secondarycount ||
|
| + CEi < tertiarycount) {
|
| + uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
|
| + if (2 * CEi < primarycount) {
|
| + value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
|
| + value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
|
| + }
|
| +
|
| + if (2 * CEi + 1 < primarycount) {
|
| + value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
|
| + value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
|
| + }
|
| +
|
| + if (CEi < secondarycount) {
|
| + value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
|
| + value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
|
| + }
|
| +
|
| + if (CEi < tertiarycount) {
|
| + value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
|
| + value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
|
| + }
|
| +
|
| + CEi ++;
|
| + ces[count ++] = value;
|
| + }
|
| +
|
| + pStartCP = pEndCP + 1;
|
| + }
|
| + ces[count] = 0;
|
| + return pStartCP;
|
| +}
|
| +
|
| +/**
|
| +* Getting the FractionalUCA.txt file stream
|
| +*/
|
| +static FileStream * getFractionalUCA(void)
|
| +{
|
| + char newPath[256];
|
| + char backupPath[256];
|
| + FileStream *result = NULL;
|
| +
|
| + /* Look inside ICU_DATA first */
|
| + uprv_strcpy(newPath, ctest_dataSrcDir());
|
| + uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
|
| + uprv_strcat(newPath, "FractionalUCA.txt");
|
| +
|
| + /* As a fallback, try to guess where the source data was located
|
| + * at the time ICU was built, and look there.
|
| + */
|
| +#if defined (U_TOPSRCDIR)
|
| + strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
|
| +#else
|
| + {
|
| + UErrorCode errorCode = U_ZERO_ERROR;
|
| + strcpy(backupPath, loadTestData(&errorCode));
|
| + strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
|
| + }
|
| +#endif
|
| + strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
|
| +
|
| + result = T_FileStream_open(newPath, "rb");
|
| +
|
| + if (result == NULL) {
|
| + result = T_FileStream_open(backupPath, "rb");
|
| + if (result == NULL) {
|
| + log_err("Failed to open either %s or %s\n", newPath, backupPath);
|
| + }
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +/**
|
| +* Testing the CEs returned by the iterator
|
| +*/
|
| +static void TestCEs() {
|
| + FileStream *file = NULL;
|
| + char line[2048];
|
| + char *str;
|
| + UChar codepoints[10];
|
| + uint32_t ces[20];
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UCollator *coll = ucol_open("", &status);
|
| + uint32_t lineNo = 0;
|
| + UChar contextCPs[5];
|
| +
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
|
| + return;
|
| + }
|
| +
|
| + file = getFractionalUCA();
|
| +
|
| + if (file == NULL) {
|
| + log_err("*** unable to open input FractionalUCA.txt file ***\n");
|
| + return;
|
| + }
|
| +
|
| +
|
| + while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
|
| + int count = 0;
|
| + UCollationElements *iter;
|
| + int32_t preContextCeLen=0;
|
| + lineNo++;
|
| + /* skip this line if it is empty or a comment or is a return value
|
| + or start of some variable section */
|
| + if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
|
| + line[0] == 0x000D || line[0] == '[') {
|
| + continue;
|
| + }
|
| +
|
| + str = getCodePoints(line, codepoints, contextCPs);
|
| +
|
| + /* these are 'fake' codepoints in the fractional UCA, and are used just
|
| + * for positioning of indirect values. They should not go through this
|
| + * test.
|
| + */
|
| + if(*codepoints == 0xFDD0) {
|
| + continue;
|
| + }
|
| + if (*contextCPs != 0) {
|
| + iter = ucol_openElements(coll, contextCPs, -1, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("Error in opening collation elements\n");
|
| + break;
|
| + }
|
| + while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
|
| + preContextCeLen++;
|
| + }
|
| + ucol_closeElements(iter);
|
| + }
|
| +
|
| + getCEs(str, ces+preContextCeLen, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("Error in parsing collation elements in FractionalUCA.txt\n");
|
| + break;
|
| + }
|
| + iter = ucol_openElements(coll, codepoints, -1, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("Error in opening collation elements\n");
|
| + break;
|
| + }
|
| + for (;;) {
|
| + uint32_t ce = (uint32_t)ucol_next(iter, &status);
|
| + if (ce == 0xFFFFFFFF) {
|
| + ce = 0;
|
| + }
|
| + /* we now unconditionally reorder Thai/Lao prevowels, so this
|
| + * test would fail if we don't skip here.
|
| + */
|
| + if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
|
| + continue;
|
| + }
|
| + if (ce != ces[count] || U_FAILURE(status)) {
|
| + log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
|
| + break;
|
| + }
|
| + if (ces[count] == 0) {
|
| + break;
|
| + }
|
| + count ++;
|
| + }
|
| + ucol_closeElements(iter);
|
| + }
|
| +
|
| + T_FileStream_close(file);
|
| + ucol_close(coll);
|
| +}
|
| +
|
| +/**
|
| +* Testing the discontigous contractions
|
| +*/
|
| +static void TestDiscontiguos() {
|
| + const char *rulestr =
|
| + "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
|
| + UChar rule[50];
|
| + int rulelen = u_unescape(rulestr, rule, 50);
|
| + const char *src[] = {
|
| + "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
|
| + /* base character blocked */
|
| + "XD\\u0300", "XD\\u0300\\u0315",
|
| + /* non blocking combining character */
|
| + "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
|
| + /* blocking combining character */
|
| + "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
|
| + /* contraction prefix */
|
| + "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
|
| + "X\\u0300\\u031A\\u0315",
|
| + /* ends not with a contraction character */
|
| + "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
|
| + "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
|
| + };
|
| + const char *tgt[] = {
|
| + /* non blocking combining character */
|
| + "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
|
| + /* base character blocked */
|
| + "X D \\u0300", "X D \\u0300\\u0315",
|
| + /* non blocking combining character */
|
| + "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
|
| + /* blocking combining character */
|
| + "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
|
| + /* contraction prefix */
|
| + "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
|
| + "X\\u0300 \\u031A \\u0315",
|
| + /* ends not with a contraction character */
|
| + "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
|
| + "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
|
| + };
|
| + int size = 20;
|
| + UCollator *coll;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + int count = 0;
|
| + UCollationElements *iter;
|
| + UCollationElements *resultiter;
|
| +
|
| + coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
|
| + iter = ucol_openElements(coll, rule, 1, &status);
|
| + resultiter = ucol_openElements(coll, rule, 1, &status);
|
| +
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
|
| + return;
|
| + }
|
| +
|
| + while (count < size) {
|
| + UChar str[20];
|
| + UChar tstr[20];
|
| + int strLen = u_unescape(src[count], str, 20);
|
| + UChar *s;
|
| +
|
| + ucol_setText(iter, str, strLen, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("Error opening collation iterator\n");
|
| + return;
|
| + }
|
| +
|
| + u_unescape(tgt[count], tstr, 20);
|
| + s = tstr;
|
| +
|
| + log_verbose("count %d\n", count);
|
| +
|
| + for (;;) {
|
| + uint32_t ce;
|
| + UChar *e = u_strchr(s, 0x20);
|
| + if (e == 0) {
|
| + e = u_strchr(s, 0);
|
| + }
|
| + ucol_setText(resultiter, s, (int32_t)(e - s), &status);
|
| + ce = ucol_next(resultiter, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("Error manipulating collation iterator\n");
|
| + return;
|
| + }
|
| + while (ce != UCOL_NULLORDER) {
|
| + if (ce != (uint32_t)ucol_next(iter, &status) ||
|
| + U_FAILURE(status)) {
|
| + log_err("Discontiguos contraction test mismatch\n");
|
| + return;
|
| + }
|
| + ce = ucol_next(resultiter, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("Error getting next collation element\n");
|
| + return;
|
| + }
|
| + }
|
| + s = e + 1;
|
| + if (*e == 0) {
|
| + break;
|
| + }
|
| + }
|
| + ucol_reset(iter);
|
| + backAndForth(iter);
|
| + count ++;
|
| + }
|
| + ucol_closeElements(resultiter);
|
| + ucol_closeElements(iter);
|
| + ucol_close(coll);
|
| +}
|
| +
|
| +static void TestCEBufferOverflow()
|
| +{
|
| + UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UChar rule[10];
|
| + UCollator *coll;
|
| + UCollationElements *iter;
|
| +
|
| + u_uastrcpy(rule, "&z < AB");
|
| + coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
|
| + return;
|
| + }
|
| +
|
| + /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
|
| + test. this will cause an overflow in getPrev */
|
| + str[0] = 0x0041; /* 'A' */
|
| + /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
|
| + uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
|
| + str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */
|
| + iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
|
| + &status);
|
| + if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
|
| + status == U_BUFFER_OVERFLOW_ERROR) {
|
| + log_err("CE buffer should not overflow with long string of trail surrogates\n");
|
| + }
|
| + ucol_closeElements(iter);
|
| + ucol_close(coll);
|
| +}
|
| +
|
| +/**
|
| +* Checking collation element validity.
|
| +*/
|
| +#define MAX_CODEPOINTS_TO_SHOW 10
|
| +static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
|
| + int i, lengthToUse = length;
|
| + if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
|
| + lengthToUse = MAX_CODEPOINTS_TO_SHOW;
|
| + }
|
| + for (i = 0; i < lengthToUse; ++i) {
|
| + int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
|
| + if (bytesWritten <= 0) {
|
| + break;
|
| + }
|
| + codepointText += bytesWritten;
|
| + }
|
| + if (i < length) {
|
| + sprintf(codepointText, " ...");
|
| + }
|
| +}
|
| +
|
| +static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
|
| + int length)
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UCollationElements *iter = ucol_openElements(coll, codepoints, length,
|
| + &status);
|
| + UBool result = FALSE;
|
| + UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
|
| + const char * collLocale;
|
| +
|
| + if (U_FAILURE(status)) {
|
| + log_err("Error creating iterator for testing validity\n");
|
| + return FALSE;
|
| + }
|
| + collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
|
| + if (U_FAILURE(status) || collLocale==NULL) {
|
| + status = U_ZERO_ERROR;
|
| + collLocale = "?";
|
| + }
|
| +
|
| + for (;;) {
|
| + uint32_t ce = ucol_next(iter, &status);
|
| + uint32_t primary, p1, p2, secondary, tertiary;
|
| + if (ce == UCOL_NULLORDER) {
|
| + result = TRUE;
|
| + break;
|
| + }
|
| + if (ce == 0) {
|
| + continue;
|
| + }
|
| + if (ce == 0x02000202) {
|
| + /* special CE for merge-sort character */
|
| + if (*codepoints == 0xFFFE /* && length == 1 */) {
|
| + /*
|
| + * Note: We should check for length==1 but the token parser appears
|
| + * to give us trailing NUL characters.
|
| + * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
|
| + * rather than the internal collation rule parser
|
| + */
|
| + continue;
|
| + } else {
|
| + log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
|
| + (int)*codepoints, (int)length);
|
| + break;
|
| + }
|
| + }
|
| + primary = UCOL_PRIMARYORDER(ce);
|
| + p1 = primary >> 8;
|
| + p2 = primary & 0xFF;
|
| + secondary = UCOL_SECONDARYORDER(ce);
|
| + tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
|
| +
|
| + if (!isContinuation(ce)) {
|
| + if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
|
| + log_err("Empty CE %08lX except for case bits\n", (long)ce);
|
| + break;
|
| + }
|
| + if (p1 == 0) {
|
| + if (p2 != 0) {
|
| + log_err("Primary 00 xx in %08lX\n", (long)ce);
|
| + break;
|
| + }
|
| + primaryDone = TRUE;
|
| + } else {
|
| + if (p1 <= 2 || p1 >= 0xF0) {
|
| + /* Primary first bytes F0..FF are specials. */
|
| + log_err("Primary first byte of %08lX out of range\n", (long)ce);
|
| + break;
|
| + }
|
| + if (p2 == 0) {
|
| + primaryDone = TRUE;
|
| + } else {
|
| + if (p2 <= 3 || p2 >= 0xFF) {
|
| + /* Primary second bytes 03 and FF are sort key compression terminators. */
|
| + log_err("Primary second byte of %08lX out of range\n", (long)ce);
|
| + break;
|
| + }
|
| + primaryDone = FALSE;
|
| + }
|
| + }
|
| + if (secondary == 0) {
|
| + if (primary != 0) {
|
| + log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
|
| + break;
|
| + }
|
| + secondaryDone = TRUE;
|
| + } else {
|
| + if (secondary <= 2 ||
|
| + (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
|
| + ) {
|
| + /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
|
| + log_err("Secondary byte of %08lX out of range\n", (long)ce);
|
| + break;
|
| + }
|
| + secondaryDone = FALSE;
|
| + }
|
| + if (tertiary == 0) {
|
| + /* We know that ce != 0. */
|
| + log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
|
| + break;
|
| + }
|
| + if (tertiary <= 2) {
|
| + log_err("Tertiary byte of %08lX out of range\n", (long)ce);
|
| + break;
|
| + }
|
| + tertiaryDone = FALSE;
|
| + } else {
|
| + if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
|
| + log_err("Empty continuation %08lX\n", (long)ce);
|
| + break;
|
| + }
|
| + if (primaryDone && primary != 0) {
|
| + log_err("Primary was done but continues in %08lX\n", (long)ce);
|
| + break;
|
| + }
|
| + if (p1 == 0) {
|
| + if (p2 != 0) {
|
| + log_err("Primary 00 xx in %08lX\n", (long)ce);
|
| + break;
|
| + }
|
| + primaryDone = TRUE;
|
| + } else {
|
| + if (p1 <= 2) {
|
| + log_err("Primary first byte of %08lX out of range\n", (long)ce);
|
| + break;
|
| + }
|
| + if (p2 == 0) {
|
| + primaryDone = TRUE;
|
| + } else {
|
| + if (p2 <= 3) {
|
| + log_err("Primary second byte of %08lX out of range\n", (long)ce);
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + if (secondaryDone && secondary != 0) {
|
| + log_err("Secondary was done but continues in %08lX\n", (long)ce);
|
| + break;
|
| + }
|
| + if (secondary == 0) {
|
| + secondaryDone = TRUE;
|
| + } else {
|
| + if (secondary <= 2) {
|
| + log_err("Secondary byte of %08lX out of range\n", (long)ce);
|
| + break;
|
| + }
|
| + }
|
| + if (tertiaryDone && tertiary != 0) {
|
| + log_err("Tertiary was done but continues in %08lX\n", (long)ce);
|
| + break;
|
| + }
|
| + if (tertiary == 0) {
|
| + tertiaryDone = TRUE;
|
| + } else if (tertiary <= 2) {
|
| + log_err("Tertiary byte of %08lX out of range\n", (long)ce);
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + if (!result) {
|
| + char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
|
| + showCodepoints(codepoints, length, codepointText);
|
| + log_err("Locale: %s Code point string: %s\n", collLocale, codepointText);
|
| + }
|
| + ucol_closeElements(iter);
|
| + return result;
|
| +}
|
| +
|
| +static void TestCEValidity()
|
| +{
|
| + /* testing UCA collation elements */
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + /* en_US has no tailorings */
|
| + UCollator *coll = ucol_open("root", &status);
|
| + /* tailored locales */
|
| + char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
|
| + const char *loc;
|
| + FileStream *file = NULL;
|
| + char line[2048];
|
| + UChar codepoints[11];
|
| + int count = 0;
|
| + int maxCount = 0;
|
| + UChar contextCPs[3];
|
| + UChar32 c;
|
| + UParseError parseError;
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
|
| + return;
|
| + }
|
| + log_verbose("Testing UCA elements\n");
|
| + file = getFractionalUCA();
|
| + if (file == NULL) {
|
| + log_err("Fractional UCA data can not be opened\n");
|
| + return;
|
| + }
|
| +
|
| + while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
|
| + if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
|
| + line[0] == 0x000D || line[0] == '[') {
|
| + continue;
|
| + }
|
| +
|
| + getCodePoints(line, codepoints, contextCPs);
|
| + checkCEValidity(coll, codepoints, u_strlen(codepoints));
|
| + }
|
| +
|
| + log_verbose("Testing UCA elements for the whole range of unicode characters\n");
|
| + for (c = 0; c <= 0xffff; ++c) {
|
| + if (u_isdefined(c)) {
|
| + codepoints[0] = (UChar)c;
|
| + checkCEValidity(coll, codepoints, 1);
|
| + }
|
| + }
|
| + for (; c <= 0x10ffff; ++c) {
|
| + if (u_isdefined(c)) {
|
| + int32_t i = 0;
|
| + U16_APPEND_UNSAFE(codepoints, i, c);
|
| + checkCEValidity(coll, codepoints, i);
|
| + }
|
| + }
|
| +
|
| + ucol_close(coll);
|
| +
|
| + /* testing tailored collation elements */
|
| + log_verbose("Testing tailored elements\n");
|
| + if(getTestOption(QUICK_OPTION)) {
|
| + maxCount = sizeof(locale)/sizeof(locale[0]);
|
| + } else {
|
| + maxCount = uloc_countAvailable();
|
| + }
|
| + while (count < maxCount) {
|
| + const UChar *rules = NULL,
|
| + *current = NULL;
|
| + UChar *rulesCopy = NULL;
|
| + int32_t ruleLen = 0;
|
| +
|
| + uint32_t chOffset = 0;
|
| + uint32_t chLen = 0;
|
| + uint32_t exOffset = 0;
|
| + uint32_t exLen = 0;
|
| + uint32_t prefixOffset = 0;
|
| + uint32_t prefixLen = 0;
|
| + UBool startOfRules = TRUE;
|
| + UColOptionSet opts;
|
| +
|
| + UColTokenParser src;
|
| + uint32_t strength = 0;
|
| + uint16_t specs = 0;
|
| + if(getTestOption(QUICK_OPTION)) {
|
| + loc = locale[count];
|
| + } else {
|
| + loc = uloc_getAvailable(count);
|
| + if(!hasCollationElements(loc)) {
|
| + count++;
|
| + continue;
|
| + }
|
| + }
|
| +
|
| + uprv_memset(&src, 0, sizeof(UColTokenParser));
|
| +
|
| + log_verbose("Testing CEs for %s\n", loc);
|
| +
|
| + coll = ucol_open(loc, &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("%s collator creation failed\n", loc);
|
| + return;
|
| + }
|
| +
|
| + src.opts = &opts;
|
| + rules = ucol_getRules(coll, &ruleLen);
|
| +
|
| + if (ruleLen > 0) {
|
| + rulesCopy = (UChar *)uprv_malloc((ruleLen +
|
| + UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
|
| + uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
|
| + src.current = src.source = rulesCopy;
|
| + src.end = rulesCopy + ruleLen;
|
| + src.extraCurrent = src.end;
|
| + src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
| +
|
| + /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
| + the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
| + while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
|
| + strength = src.parsedToken.strength;
|
| + chOffset = src.parsedToken.charsOffset;
|
| + chLen = src.parsedToken.charsLen;
|
| + exOffset = src.parsedToken.extensionOffset;
|
| + exLen = src.parsedToken.extensionLen;
|
| + prefixOffset = src.parsedToken.prefixOffset;
|
| + prefixLen = src.parsedToken.prefixLen;
|
| + specs = src.parsedToken.flags;
|
| +
|
| + startOfRules = FALSE;
|
| + uprv_memcpy(codepoints, src.source + chOffset,
|
| + chLen * sizeof(UChar));
|
| + codepoints[chLen] = 0;
|
| + checkCEValidity(coll, codepoints, chLen);
|
| + }
|
| + uprv_free(src.source);
|
| + }
|
| +
|
| + ucol_close(coll);
|
| + count ++;
|
| + }
|
| + T_FileStream_close(file);
|
| +}
|
| +
|
| +static void printSortKeyError(const UChar *codepoints, int length,
|
| + uint8_t *sortkey, int sklen)
|
| +{
|
| + int count = 0;
|
| + log_err("Sortkey not valid for ");
|
| + while (length > 0) {
|
| + log_err("0x%04x ", *codepoints);
|
| + length --;
|
| + codepoints ++;
|
| + }
|
| + log_err("\nSortkey : ");
|
| + while (count < sklen) {
|
| + log_err("0x%02x ", sortkey[count]);
|
| + count ++;
|
| + }
|
| + log_err("\n");
|
| +}
|
| +
|
| +/**
|
| +* Checking sort key validity for all levels
|
| +*/
|
| +static UBool checkSortKeyValidity(UCollator *coll,
|
| + const UChar *codepoints,
|
| + int length)
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
|
| + UCOL_TERTIARY, UCOL_QUATERNARY,
|
| + UCOL_IDENTICAL};
|
| + int strengthlen = 5;
|
| + int strengthIndex = 0;
|
| + int caselevel = 0;
|
| +
|
| + while (caselevel < 1) {
|
| + if (caselevel == 0) {
|
| + ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
|
| + }
|
| + else {
|
| + ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
|
| + }
|
| +
|
| + while (strengthIndex < strengthlen) {
|
| + int count01 = 0;
|
| + uint32_t count = 0;
|
| + uint8_t sortkey[128];
|
| + uint32_t sklen;
|
| +
|
| + ucol_setStrength(coll, strength[strengthIndex]);
|
| + sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
|
| + while (sortkey[count] != 0) {
|
| + if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
|
| + printSortKeyError(codepoints, length, sortkey, sklen);
|
| + return FALSE;
|
| + }
|
| + if (sortkey[count] == 1) {
|
| + count01 ++;
|
| + }
|
| + count ++;
|
| + }
|
| +
|
| + if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
|
| + printSortKeyError(codepoints, length, sortkey, sklen);
|
| + return FALSE;
|
| + }
|
| + strengthIndex ++;
|
| + }
|
| + caselevel ++;
|
| + }
|
| + return TRUE;
|
| +}
|
| +
|
| +static void TestSortKeyValidity(void)
|
| +{
|
| + /* testing UCA collation elements */
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + /* en_US has no tailorings */
|
| + UCollator *coll = ucol_open("en_US", &status);
|
| + /* tailored locales */
|
| + char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
|
| + FileStream *file = NULL;
|
| + char line[2048];
|
| + UChar codepoints[10];
|
| + int count = 0;
|
| + UChar contextCPs[5];
|
| + UParseError parseError;
|
| + if (U_FAILURE(status)) {
|
| + log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
|
| + return;
|
| + }
|
| + log_verbose("Testing UCA elements\n");
|
| + file = getFractionalUCA();
|
| + if (file == NULL) {
|
| + log_err("Fractional UCA data can not be opened\n");
|
| + return;
|
| + }
|
| +
|
| + while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
|
| + if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
|
| + line[0] == 0x000D || line[0] == '[') {
|
| + continue;
|
| + }
|
| +
|
| + getCodePoints(line, codepoints, contextCPs);
|
| + if(codepoints[0] == 0xFFFE) {
|
| + /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
|
| + continue;
|
| + }
|
| + checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
|
| + }
|
| +
|
| + log_verbose("Testing UCA elements for the whole range of unicode characters\n");
|
| + codepoints[0] = 0;
|
| +
|
| + while (codepoints[0] < 0xFFFF) {
|
| + if (u_isdefined((UChar32)codepoints[0])) {
|
| + checkSortKeyValidity(coll, codepoints, 1);
|
| + }
|
| + codepoints[0] ++;
|
| + }
|
| +
|
| + ucol_close(coll);
|
| +
|
| + /* testing tailored collation elements */
|
| + log_verbose("Testing tailored elements\n");
|
| + while (count < 5) {
|
| + const UChar *rules = NULL,
|
| + *current = NULL;
|
| + UChar *rulesCopy = NULL;
|
| + int32_t ruleLen = 0;
|
| +
|
| + uint32_t chOffset = 0;
|
| + uint32_t chLen = 0;
|
| + uint32_t exOffset = 0;
|
| + uint32_t exLen = 0;
|
| + uint32_t prefixOffset = 0;
|
| + uint32_t prefixLen = 0;
|
| + UBool startOfRules = TRUE;
|
| + UColOptionSet opts;
|
| +
|
| + UColTokenParser src;
|
| + uint32_t strength = 0;
|
| + uint16_t specs = 0;
|
| +
|
| + uprv_memset(&src, 0, sizeof(UColTokenParser));
|
| +
|
| + coll = ucol_open(locale[count], &status);
|
| + if (U_FAILURE(status)) {
|
| + log_err("%s collator creation failed\n", locale[count]);
|
| + return;
|
| + }
|
| +
|
| + src.opts = &opts;
|
| + rules = ucol_getRules(coll, &ruleLen);
|
| +
|
| + if (ruleLen > 0) {
|
| + rulesCopy = (UChar *)uprv_malloc((ruleLen +
|
| + UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
|
| + uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
|
| + src.current = src.source = rulesCopy;
|
| + src.end = rulesCopy + ruleLen;
|
| + src.extraCurrent = src.end;
|
| + src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
| +
|
| + /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
|
| + the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
|
| + while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
|
| + strength = src.parsedToken.strength;
|
| + chOffset = src.parsedToken.charsOffset;
|
| + chLen = src.parsedToken.charsLen;
|
| + exOffset = src.parsedToken.extensionOffset;
|
| + exLen = src.parsedToken.extensionLen;
|
| + prefixOffset = src.parsedToken.prefixOffset;
|
| + prefixLen = src.parsedToken.prefixLen;
|
| + specs = src.parsedToken.flags;
|
| +
|
| + startOfRules = FALSE;
|
| + uprv_memcpy(codepoints, src.source + chOffset,
|
| + chLen * sizeof(UChar));
|
| + codepoints[chLen] = 0;
|
| + if(codepoints[0] == 0xFFFE) {
|
| + /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
|
| + continue;
|
| + }
|
| + checkSortKeyValidity(coll, codepoints, chLen);
|
| + }
|
| + uprv_free(src.source);
|
| + }
|
| +
|
| + ucol_close(coll);
|
| + count ++;
|
| + }
|
| + T_FileStream_close(file);
|
| +}
|
| +
|
| +#endif /* #if !UCONFIG_NO_COLLATION */
|
|
|
| Property changes on: icu46/source/test/cintltst/citertst.c
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|