source/test/intltest/rbbitst.cpp - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/test/intltest/rbbitst.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /********************************************************************	1 /********************************************************************

2 * COPYRIGHT:	2 * COPYRIGHT:

3 * Copyright (c) 1999-2013, International Business Machines Corporation and	3 * Copyright (c) 1999-2014, International Business Machines Corporation and

4 * others. All Rights Reserved.	4 * others. All Rights Reserved.

5 ********************************************************************/	5 ********************************************************************/

6 /************************************************************************	6 /************************************************************************

7 * Date Name Description	7 * Date Name Description

8 * 12/15/99 Madhu Creation.	8 * 12/15/99 Madhu Creation.

9 * 01/12/2000 Madhu Updated for changed API and added new tests	9 * 01/12/2000 Madhu Updated for changed API and added new tests

10 ************************************************************************/	10 ************************************************************************/

11	11

12 #include "utypeinfo.h" // for 'typeid' to work	12 #include "utypeinfo.h" // for 'typeid' to work

13	13

(...skipping 10 matching lines...) Expand all Loading...
24 #include "unicode/schriter.h"	24 #include "unicode/schriter.h"

25 #include "unicode/uniset.h"	25 #include "unicode/uniset.h"

26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS	26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

27 #include "unicode/regex.h"	27 #include "unicode/regex.h"

28 #endif	28 #endif

29 #include "unicode/ustring.h"	29 #include "unicode/ustring.h"

30 #include "unicode/utext.h"	30 #include "unicode/utext.h"

31 #include "intltest.h"	31 #include "intltest.h"

32 #include "rbbitst.h"	32 #include "rbbitst.h"

33 #include <string.h>	33 #include <string.h>

	34 #include "charstr.h"

34 #include "uvector.h"	35 #include "uvector.h"

35 #include "uvectr32.h"	36 #include "uvectr32.h"

36 #include <string.h>

37 #include <stdio.h>	37 #include <stdio.h>

38 #include <stdlib.h>	38 #include <stdlib.h>

39 #include "unicode/numfmt.h"	39 #include "unicode/numfmt.h"

40 #include "unicode/uscript.h"	40 #include "unicode/uscript.h"

41	41

42 #define TEST_ASSERT(x) {if (!(x)) { \	42 #define TEST_ASSERT(x) {if (!(x)) { \

43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}}	43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}}

44	44

45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \	45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \

46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}}	46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}}

(...skipping 300 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
347 errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag);	347 errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag);

348 break;	348 break;

349 }	349 }

350 i++;	350 i++;

351 }	351 }

352 }	352 }

353 delete bi;	353 delete bi;

354 }	354 }

355	355

356	356

357 static void printStringBreaks(UnicodeString ustr, int expected[],	357 static void printStringBreaks(UText *tstr, int expected[], int expectedCount) {

358 int expectedcount)

359 {

360 UErrorCode status = U_ZERO_ERROR;	358 UErrorCode status = U_ZERO_ERROR;

361 char name[100];	359 char name[100];

362 printf("code alpha extend alphanum type word sent line name\n");	360 printf("code alpha extend alphanum type word sent line name\n");

363 int j;	361 int nextExpectedIndex = 0;

364 for (j = 0; j < ustr.length(); j ++) {	362 utext_setNativeIndex(tstr, 0);

365 if (expectedcount > 0) {	363 for (int j = 0; j < utext_nativeLength(tstr); j=utext_getNativeIndex(tstr)) {

366 int k;	364 if (nextExpectedIndex < expectedCount && j >= expected[nextExpectedIndex ] ) {

367 for (k = 0; k < expectedcount; k ++) {	365 printf("------------------------------------------------ %d\n", j);

368 if (j == expected[k]) {	366 ++nextExpectedIndex;

369 printf("------------------------------------------------ %d\ n",

370 j);

371 }

372 }

373 }	367 }

374 UChar32 c = ustr.char32At(j);	368

375 if (c > 0xffff) {	369 UChar32 c = utext_next32(tstr);

376 j ++;

377 }

378 u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status);	370 u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status);

379 printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c,	371 printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c,

380 u_isUAlphabetic(c),	372 u_isUAlphabetic(c),

381 u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND),	373 u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND),

382 u_isalnum(c),	374 u_isalnum(c),

383 u_getPropertyValueName(UCHAR_GENERAL_CATEGORY,	375 u_getPropertyValueName(UCHAR_GENERAL_CATEGORY,

384 u_charType(c),	376 u_charType(c),

385 U_SHORT_PROPERTY_NAME),	377 U_SHORT_PROPERTY_NAME),

386 u_getPropertyValueName(UCHAR_WORD_BREAK,	378 u_getPropertyValueName(UCHAR_WORD_BREAK,

387 u_getIntPropertyValue(c,	379 u_getIntPropertyValue(c,

388 UCHAR_WORD_BREAK),	380 UCHAR_WORD_BREAK),

389 U_SHORT_PROPERTY_NAME),	381 U_SHORT_PROPERTY_NAME),

390 u_getPropertyValueName(UCHAR_SENTENCE_BREAK,	382 u_getPropertyValueName(UCHAR_SENTENCE_BREAK,

391 u_getIntPropertyValue(c,	383 u_getIntPropertyValue(c,

392 UCHAR_SENTENCE_BREAK),	384 UCHAR_SENTENCE_BREAK),

393 U_SHORT_PROPERTY_NAME),	385 U_SHORT_PROPERTY_NAME),

394 u_getPropertyValueName(UCHAR_LINE_BREAK,	386 u_getPropertyValueName(UCHAR_LINE_BREAK,

395 u_getIntPropertyValue(c,	387 u_getIntPropertyValue(c,

396 UCHAR_LINE_BREAK),	388 UCHAR_LINE_BREAK),

397 U_SHORT_PROPERTY_NAME),	389 U_SHORT_PROPERTY_NAME),

398 name);	390 name);

399 }	391 }

400 }	392 }

401	393

402	394

	395 static void printStringBreaks(const UnicodeString &ustr, int expected[], int exp ectedCount) {

	396 UErrorCode status = U_ZERO_ERROR;

	397 UText *tstr = NULL;

	398 tstr = utext_openConstUnicodeString(NULL, &ustr, &status);

	399 if (U_FAILURE(status)) {

	400 printf("printStringBreaks, utext_openConstUnicodeString() returns %s\n", u_errorName(status));

	401 return;

	402 }

	403 printStringBreaks(tstr, expected, expectedCount);

	404 utext_close(tstr);

	405 }

	406

	407

403 void RBBITest::TestBug3818() {	408 void RBBITest::TestBug3818() {

404 UErrorCode status = U_ZERO_ERROR;	409 UErrorCode status = U_ZERO_ERROR;

405	410

406 // Four Thai words...	411 // Four Thai words...

407 static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0 x0E2B,0x0E0D,0x0E48,	412 static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0 x0E2B,0x0E0D,0x0E48,

408 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0 x0E2B,0x0E0D,0x0E48, 0 };	413 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0 x0E2B,0x0E0D,0x0E48, 0 };

409 UnicodeString thaiStr(thaiWordData);	414 UnicodeString thaiStr(thaiWordData);

410	415

411 BreakIterator* bi = BreakIterator::createWordInstance(Locale("th"), status);	416 BreakIterator* bi = BreakIterator::createWordInstance(Locale("th"), status);

412 if (U_FAILURE(status) \|\| bi == NULL) {	417 if (U_FAILURE(status) \|\| bi == NULL) {

(...skipping 410 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
823	828

824	829

825	830

826 //------------------------------------------------------------------------------	831 //------------------------------------------------------------------------------

827 //	832 //

828 // RBBITest::Extended Run RBBI Tests from an external test data file	833 // RBBITest::Extended Run RBBI Tests from an external test data file

829 //	834 //

830 //------------------------------------------------------------------------------	835 //------------------------------------------------------------------------------

831	836

832 struct TestParams {	837 struct TestParams {

833 BreakIterator *bi;	838 BreakIterator *bi; // Break iterator is set while parsin g test source.

834 UnicodeString dataToBreak;	839 // Changed out whenever test data c hanges break type.

835 UVector32 *expectedBreaks;	840

836 UVector32 *srcLine;	841 UnicodeString dataToBreak; // Data that is built up while parsin g the test.

	842 UVector32 *expectedBreaks; // Expected break positions, matches dataToBreak UnicodeString.

	843 UVector32 *srcLine; // Positions in source file, indexed same as dataToBreak.

837 UVector32 *srcCol;	844 UVector32 *srcCol;

	845

	846 UText *textToBreak; // UText, could be UTF8 or UTF16.

	847 UVector32 *textMap; // Map from UTF-16 dataToBreak offset s to UText offsets.

	848 CharString utf8String; // UTF-8 form of text to break.

	849

	850 TestParams(UErrorCode &status) : dataToBreak() {

	851 bi = NULL;

	852 expectedBreaks = new UVector32(status);

	853 srcLine = new UVector32(status);

	854 srcCol = new UVector32(status);

	855 textToBreak = NULL;

	856 textMap = new UVector32(status);

	857 }

	858

	859 ~TestParams() {

	860 delete bi;

	861 delete expectedBreaks;

	862 delete srcLine;

	863 delete srcCol;

	864 utext_close(textToBreak);

	865 delete textMap;

	866 }

	867

	868 int32_t getSrcLine(int32_t bp);

	869 int32_t getExpectedBreak(int32_t bp);

	870 int32_t getSrcCol(int32_t bp);

	871

	872 void setUTF16(UErrorCode &status);

	873 void setUTF8(UErrorCode &status);

838 };	874 };

839	875

840 void RBBITest::executeTest(TestParams *t) {	876 // Append a UnicodeString to a CharString with UTF-8 encoding.

	877 // Substitute any invalid chars.

	878 // Note: this is used with test data that includes a few unpaired surrogates i n the UTF-16 that will be substituted.

	879 static void CharStringAppend(CharString &dest, const UnicodeString &src, UErrorC ode &status) {

	880 if (U_FAILURE(status)) {

	881 return;

	882 }

	883 int32_t utf8Length;

	884 u_strToUTF8WithSub(NULL, 0, &utf8Length, // Output Buffer, NULL f or preflight.

	885 src.getBuffer(), src.length(), // UTF-16 data

	886 0xfffd, NULL, // Substitution char, nu mber of subs.

	887 &status);

	888 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {

	889 return;

	890 }

	891 status = U_ZERO_ERROR;

	892 int32_t capacity;

	893 char *buffer = dest.getAppendBuffer(utf8Length, utf8Length, capacity, status );

	894 u_strToUTF8WithSub(buffer, utf8Length, NULL,

	895 src.getBuffer(), src.length(),

	896 0xfffd, NULL, &status);

	897 dest.append(buffer, utf8Length, status);

	898 }

	899

	900

	901 void TestParams::setUTF16(UErrorCode &status) {

	902 textToBreak = utext_openUnicodeString(textToBreak, &dataToBreak, &status);

	903 textMap->removeAllElements();

	904 for (int32_t i=0; i<dataToBreak.length(); i++) {

	905 if (i == dataToBreak.getChar32Start(i)) {

	906 textMap->addElement(i, status);

	907 } else {

	908 textMap->addElement(-1, status);

	909 }

	910 }

	911 textMap->addElement(dataToBreak.length(), status);

	912 U_ASSERT(dataToBreak.length() + 1 == textMap->size());

	913 }

	914

	915

	916 void TestParams::setUTF8(UErrorCode &status) {

	917 if (U_FAILURE(status)) {

	918 return;

	919 }

	920 utf8String.clear();

	921 CharStringAppend(utf8String, dataToBreak, status);

	922 textToBreak = utext_openUTF8(textToBreak, utf8String.data(), utf8String.leng th(), &status);

	923 if (U_FAILURE(status)) {

	924 return;

	925 }

	926

	927 textMap->removeAllElements();

	928 int32_t utf16Index = 0;

	929 for (;;) {

	930 textMap->addElement(utf16Index, status);

	931 UChar32 c32 = utext_current32(textToBreak);

	932 if (c32 < 0) {

	933 break;

	934 }

	935 utf16Index += U16_LENGTH(c32);

	936 utext_next32(textToBreak);

	937 while (textMap->size() < utext_getNativeIndex(textToBreak)) {

	938 textMap->addElement(-1, status);

	939 }

	940 }

	941 U_ASSERT(utext_nativeLength(textToBreak) + 1 == textMap->size());

	942 }

	943

	944

	945 int32_t TestParams::getSrcLine(int bp) {

	946 if (bp >= textMap->size()) {

	947 bp = textMap->size() - 1;

	948 }

	949 int32_t i = 0;

	950 for(; bp >= 0 ; --bp) {

	951 // Move to a character boundary if we are not on one already.

	952 i = textMap->elementAti(bp);

	953 if (i >= 0) {

	954 break;

	955 }

	956 }

	957 return srcLine->elementAti(i);

	958 }

	959

	960

	961 int32_t TestParams::getExpectedBreak(int bp) {

	962 if (bp >= textMap->size()) {

	963 return 0;

	964 }

	965 int32_t i = textMap->elementAti(bp);

	966 int32_t retVal = 0;

	967 if (i >= 0) {

	968 retVal = expectedBreaks->elementAti(i);

	969 }

	970 return retVal;

	971 }

	972

	973

	974 int32_t TestParams::getSrcCol(int bp) {

	975 if (bp >= textMap->size()) {

	976 bp = textMap->size() - 1;

	977 }

	978 int32_t i = 0;

	979 for(; bp >= 0; --bp) {

	980 // Move bp to a character boundary if we are not on one already.

	981 i = textMap->elementAti(bp);

	982 if (i >= 0) {

	983 break;

	984 }

	985 }

	986 return srcCol->elementAti(i);

	987 }

	988

	989

	990 void RBBITest::executeTest(TestParams *t, UErrorCode &status) {

841 int32_t bp;	991 int32_t bp;

842 int32_t prevBP;	992 int32_t prevBP;

843 int32_t i;	993 int32_t i;

844	994

	995 TEST_ASSERT_SUCCESS(status);

	996 if (U_FAILURE(status)) {

	997 return;

	998 }

	999

845 if (t->bi == NULL) {	1000 if (t->bi == NULL) {

846 return;	1001 return;

847 }	1002 }

848	1003

849 t->bi->setText(t->dataToBreak);	1004 t->bi->setText(t->textToBreak, status);

850 //	1005 //

851 // Run the iterator forward	1006 // Run the iterator forward

852 //	1007 //

853 prevBP = -1;	1008 prevBP = -1;

854 for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) {	1009 for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) {

855 if (prevBP == bp) {	1010 if (prevBP == bp) {

856 // Fail for lack of forward progress.	1011 // Fail for lack of forward progress.

857 errln("Forward Iteration, no forward progress. Break Pos=%4d File line,col=%4d,%4d",	1012 errln("Forward Iteration, no forward progress. Break Pos=%4d File line,col=%4d,%4d",

858 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));	1013 bp, t->getSrcLine(bp), t->getSrcCol(bp));

859 break;	1014 break;

860 }	1015 }

861	1016

862 // Check that there were we didn't miss an expected break between the la st one	1017 // Check that there we didn't miss an expected break between the last on e

863 // and this one.	1018 // and this one.

864 for (i=prevBP+1; i<bp; i++) {	1019 for (i=prevBP+1; i<bp; i++) {

865 if (t->expectedBreaks->elementAti(i) != 0) {	1020 if (t->getExpectedBreak(i) != 0) {

866 int expected[] = {0, i};	1021 int expected[] = {0, i};

867 printStringBreaks(t->dataToBreak, expected, 2);	1022 printStringBreaks(t->dataToBreak, expected, 2);

868 errln("Forward Iteration, break expected, but not found. Pos=%4 d File line,col= %4d,%4d",	1023 errln("Forward Iteration, break expected, but not found. Pos=%4 d File line,col= %4d,%4d",

869 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));	1024 i, t->getSrcLine(i), t->getSrcCol(i));

870 }	1025 }

871 }	1026 }

872	1027

873 // Check that the break we did find was expected	1028 // Check that the break we did find was expected

874 if (t->expectedBreaks->elementAti(bp) == 0) {	1029 if (t->getExpectedBreak(bp) == 0) {

875 int expected[] = {0, bp};	1030 int expected[] = {0, bp};

876 printStringBreaks(t->dataToBreak, expected, 2);	1031 printStringBreaks(t->textToBreak, expected, 2);

877 errln("Forward Iteration, break found, but not expected. Pos=%4d F ile line,col= %4d,%4d",	1032 errln("Forward Iteration, break found, but not expected. Pos=%4d F ile line,col= %4d,%4d",

878 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));	1033 bp, t->getSrcLine(bp), t->getSrcCol(bp));

879 } else {	1034 } else {

880 // The break was expected.	1035 // The break was expected.

881 // Check that the {nnn} tag value is correct.	1036 // Check that the {nnn} tag value is correct.

882 int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);	1037 int32_t expectedTagVal = t->getExpectedBreak(bp);

883 if (expectedTagVal == -1) {	1038 if (expectedTagVal == -1) {

884 expectedTagVal = 0;	1039 expectedTagVal = 0;

885 }	1040 }

886 int32_t line = t->srcLine->elementAti(bp);	1041 int32_t line = t->getSrcLine(bp);

887 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();	1042 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();

888 if (rs != expectedTagVal) {	1043 if (rs != expectedTagVal) {

889 errln("Incorrect status for forward break. Pos=%4d File line,c ol= %4d,%4d.\n"	1044 errln("Incorrect status for forward break. Pos=%4d File line,c ol= %4d,%4d.\n"

890 " Actual, Expected status = %4d, %4d",	1045 " Actual, Expected status = %4d, %4d",

891 bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);	1046 bp, line, t->getSrcCol(bp), rs, expectedTagVal);

892 }	1047 }

893 }	1048 }

894	1049

895

896 prevBP = bp;	1050 prevBP = bp;

897 }	1051 }

898	1052

899 // Verify that there were no missed expected breaks after the last one found	1053 // Verify that there were no missed expected breaks after the last one found

900 for (i=prevBP+1; i<t->expectedBreaks->size(); i++) {	1054 for (i=prevBP+1; i<utext_nativeLength(t->textToBreak); i++) {

901 if (t->expectedBreaks->elementAti(i) != 0) {	1055 if (t->getExpectedBreak(i) != 0) {

902 errln("Forward Iteration, break expected, but not found. Pos=%4d F ile line,col= %4d,%4d",	1056 errln("Forward Iteration, break expected, but not found. Pos=%4d F ile line,col= %4d,%4d",

903 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));	1057 i, t->getSrcLine(i), t->getSrcCol(i));

904 }	1058 }

905 }	1059 }

906	1060

907 //	1061 //

908 // Run the iterator backwards, verify that the same breaks are found.	1062 // Run the iterator backwards, verify that the same breaks are found.

909 //	1063 //

910 prevBP = t->dataToBreak.length()+2; // start with a phony value for the las t break pos seen.	1064 prevBP = utext_nativeLength(t->textToBreak)+2; // start with a phony value for the last break pos seen.

911 for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) {	1065 for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) {

912 if (prevBP == bp) {	1066 if (prevBP == bp) {

913 // Fail for lack of progress.	1067 // Fail for lack of progress.

914 errln("Reverse Iteration, no progress. Break Pos=%4d File line,col =%4d,%4d",	1068 errln("Reverse Iteration, no progress. Break Pos=%4d File line,col =%4d,%4d",

915 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));	1069 bp, t->getSrcLine(bp), t->getSrcCol(bp));

916 break;	1070 break;

917 }	1071 }

918	1072

919 // Check that there were we didn't miss an expected break between the la st one	1073 // Check that we didn't miss an expected break between the last one

920 // and this one. (UVector returns zeros for index out of bounds.)	1074 // and this one. (UVector returns zeros for index out of bounds.)

921 for (i=prevBP-1; i>bp; i--) {	1075 for (i=prevBP-1; i>bp; i--) {

922 if (t->expectedBreaks->elementAti(i) != 0) {	1076 if (t->getExpectedBreak(i) != 0) {

923 errln("Reverse Itertion, break expected, but not found. Pos=%4d File line,col= %4d,%4d",	1077 errln("Reverse Iteration, break expected, but not found. Pos=%4 d File line,col= %4d,%4d",

924 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));	1078 i, t->getSrcLine(i), t->getSrcCol(i));

925 }	1079 }

926 }	1080 }

927	1081

928 // Check that the break we did find was expected	1082 // Check that the break we did find was expected

929 if (t->expectedBreaks->elementAti(bp) == 0) {	1083 if (t->getExpectedBreak(bp) == 0) {

930 errln("Reverse Itertion, break found, but not expected. Pos=%4d Fi le line,col= %4d,%4d",	1084 errln("Reverse Itertion, break found, but not expected. Pos=%4d Fi le line,col= %4d,%4d",

931 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));	1085 bp, t->getSrcLine(bp), t->getSrcCol(bp));

932 } else {	1086 } else {

933 // The break was expected.	1087 // The break was expected.

934 // Check that the {nnn} tag value is correct.	1088 // Check that the {nnn} tag value is correct.

935 int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);	1089 int32_t expectedTagVal = t->getExpectedBreak(bp);

936 if (expectedTagVal == -1) {	1090 if (expectedTagVal == -1) {

937 expectedTagVal = 0;	1091 expectedTagVal = 0;

938 }	1092 }

939 int line = t->srcLine->elementAti(bp);	1093 int line = t->getSrcLine(bp);

940 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();	1094 int32_t rs = t->bi->getRuleStatus();

941 if (rs != expectedTagVal) {	1095 if (rs != expectedTagVal) {

942 errln("Incorrect status for reverse break. Pos=%4d File line,c ol= %4d,%4d.\n"	1096 errln("Incorrect status for reverse break. Pos=%4d File line,c ol= %4d,%4d.\n"

943 " Actual, Expected status = %4d, %4d",	1097 " Actual, Expected status = %4d, %4d",

944 bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);	1098 bp, line, t->getSrcCol(bp), rs, expectedTagVal);

945 }	1099 }

946 }	1100 }

947	1101

948 prevBP = bp;	1102 prevBP = bp;

949 }	1103 }

950	1104

951 // Verify that there were no missed breaks prior to the last one found	1105 // Verify that there were no missed breaks prior to the last one found

952 for (i=prevBP-1; i>=0; i--) {	1106 for (i=prevBP-1; i>=0; i--) {

953 if (t->expectedBreaks->elementAti(i) != 0) {	1107 if (t->getExpectedBreak(i) != 0) {

954 errln("Forward Itertion, break expected, but not found. Pos=%4d Fi le line,col= %4d,%4d",	1108 errln("Forward Itertion, break expected, but not found. Pos=%4d Fi le line,col= %4d,%4d",

955 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));	1109 i, t->getSrcLine(i), t->getSrcCol(i));

956 }	1110 }

957 }	1111 }

958	1112

959 // Check isBoundary()	1113 // Check isBoundary()

960 for (i=0; i<t->expectedBreaks->size(); i++) {	1114 for (i=0; i < utext_nativeLength(t->textToBreak); i++) {

961 UBool boundaryExpected = (t->expectedBreaks->elementAti(i) != 0);	1115 UBool boundaryExpected = (t->getExpectedBreak(i) != 0);

962 UBool boundaryFound = t->bi->isBoundary(i);	1116 UBool boundaryFound = t->bi->isBoundary(i);

963 if (boundaryExpected != boundaryFound) {	1117 if (boundaryExpected != boundaryFound) {

964 errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n"	1118 errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n"

965 " Expected, Actual= %s, %s",	1119 " Expected, Actual= %s, %s",

966 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i),	1120 i, t->getSrcLine(i), t->getSrcCol(i),

967 boundaryExpected ? "true":"false", boundaryFound? "true" : "fa lse");	1121 boundaryExpected ? "true":"false", boundaryFound? "true" : "fa lse");

968 }	1122 }

969 }	1123 }

970	1124

971 // Check following()	1125 // Check following()

972 for (i=0; i<t->expectedBreaks->size(); i++) {	1126 for (i=0; i < utext_nativeLength(t->textToBreak); i++) {

973 int32_t actualBreak = t->bi->following(i);	1127 int32_t actualBreak = t->bi->following(i);

974 int32_t expectedBreak = BreakIterator::DONE;	1128 int32_t expectedBreak = BreakIterator::DONE;

975 for (int32_t j=i+1; j < t->expectedBreaks->size(); j++) {	1129 for (int32_t j=i+1; j <= utext_nativeLength(t->textToBreak); j++) {

976 if (t->expectedBreaks->elementAti(j) != 0) {	1130 if (t->getExpectedBreak(j) != 0) {

977 expectedBreak = j;	1131 expectedBreak = j;

978 break;	1132 break;

979 }	1133 }

980 }	1134 }

981 if (expectedBreak != actualBreak) {	1135 if (expectedBreak != actualBreak) {

982 errln("following(%d) incorrect. File line,col= %4d,%4d\n"	1136 errln("following(%d) incorrect. File line,col= %4d,%4d\n"

983 " Expected, Actual= %d, %d",	1137 " Expected, Actual= %d, %d",

984 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expect edBreak, actualBreak);	1138 i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBre ak);

985 }	1139 }

986 }	1140 }

987	1141

988 // Check preceding()	1142 // Check preceding()

989 for (i=t->expectedBreaks->size(); i>=0; i--) {	1143 for (i=utext_nativeLength(t->textToBreak); i>=0; i--) {

990 int32_t actualBreak = t->bi->preceding(i);	1144 int32_t actualBreak = t->bi->preceding(i);

991 int32_t expectedBreak = BreakIterator::DONE;	1145 int32_t expectedBreak = BreakIterator::DONE;

992	1146

993 for (int32_t j=i-1; j >= 0; j--) {	1147 // For UTF-8 & UTF-16 supplementals, all code units of a character are e quivalent.

994 if (t->expectedBreaks->elementAti(j) != 0) {	1148 // preceding(trailing byte) will return the index of some preceding code point,

	1149 // not the lead byte of the current code point, even though that has a s maller index.

	1150 // Therefore, start looking at the expected break data not at i-1, but a t

	1151 // the start of code point index - 1.

	1152 utext_setNativeIndex(t->textToBreak, i);

	1153 int32_t j = utext_getNativeIndex(t->textToBreak) - 1;

	1154 for (; j >= 0; j--) {

	1155 if (t->getExpectedBreak(j) != 0) {

995 expectedBreak = j;	1156 expectedBreak = j;

996 break;	1157 break;

997 }	1158 }

998 }	1159 }

999 if (expectedBreak != actualBreak) {	1160 if (expectedBreak != actualBreak) {

1000 errln("preceding(%d) incorrect. File line,col= %4d,%4d\n"	1161 errln("preceding(%d) incorrect. File line,col= %4d,%4d\n"

1001 " Expected, Actual= %d, %d",	1162 " Expected, Actual= %d, %d",

1002 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expect edBreak, actualBreak);	1163 i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBre ak);

1003 }	1164 }

1004 }	1165 }

1005 }	1166 }

1006	1167

1007	1168

1008 void RBBITest::TestExtended() {	1169 void RBBITest::TestExtended() {

1009 #if !UCONFIG_NO_REGULAR_EXPRESSIONS	1170 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

1010 UErrorCode status = U_ZERO_ERROR;	1171 UErrorCode status = U_ZERO_ERROR;

1011 Locale locale("");	1172 Locale locale("");

1012	1173

1013 UnicodeString rules;	1174 UnicodeString rules;

1014 TestParams tp;	1175 TestParams tp(status);

1015 tp.bi = NULL;

1016 tp.expectedBreaks = new UVector32(status);

1017 tp.srcLine = new UVector32(status);

1018 tp.srcCol = new UVector32(status);

1019	1176

1020 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale ([\\p{L}\\p{ Nd}_]) *>"), 0, status);	1177 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale ([\\p{L}\\p{ Nd}_]) *>"), 0, status);

1021 if (U_FAILURE(status)) {	1178 if (U_FAILURE(status)) {

1022 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI NE__, u_errorName(status));	1179 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI NE__, u_errorName(status));

1023 }	1180 }

1024	1181

1025	1182

1026 //	1183 //

1027 // Open and read the test data file.	1184 // Open and read the test data file.

1028 //	1185 //

(...skipping 154 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1183 // Add final entry to mappings from break location to source fil e position.	1340 // Add final entry to mappings from break location to source fil e position.

1184 // Need one extra because last break position returned is after the	1341 // Need one extra because last break position returned is after the

1185 // last char in the data, not at the last char.	1342 // last char in the data, not at the last char.

1186 tp.srcLine->addElement(lineNum, status);	1343 tp.srcLine->addElement(lineNum, status);

1187 tp.srcCol ->addElement(column, status);	1344 tp.srcCol ->addElement(column, status);

1188	1345

1189 parseState = PARSE_TAG;	1346 parseState = PARSE_TAG;

1190 charIdx += 6;	1347 charIdx += 6;

1191	1348

1192 // RUN THE TEST!	1349 // RUN THE TEST!

1193 executeTest(&tp);	1350 status = U_ZERO_ERROR;

	1351 tp.setUTF16(status);

	1352 executeTest(&tp, status);

	1353 TEST_ASSERT_SUCCESS(status);

	1354

	1355 // Run again, this time with UTF-8 text wrapped in a UText.

	1356 status = U_ZERO_ERROR;

	1357 tp.setUTF8(status);

	1358 TEST_ASSERT_SUCCESS(status);

	1359 executeTest(&tp, status);

1194 break;	1360 break;

1195 }	1361 }

1196	1362

1197 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {	1363 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {

1198 // Named character, e.g. \N{COMBINING GRAVE ACCENT}	1364 // Named character, e.g. \N{COMBINING GRAVE ACCENT}

1199 // Get the code point from the name and insert it into the test data.	1365 // Get the code point from the name and insert it into the test data.

1200 // (Damn, no API takes names in Unicode !!!	1366 // (Damn, no API takes names in Unicode !!!

1201 // we've got to take it back to char *)	1367 // we've got to take it back to char *)

1202 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/'}'/, char Idx);	1368 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/'}'/, char Idx);

1203 int32_t nameLength = nameEndIdx - (charIdx+2);	1369 int32_t nameLength = nameEndIdx - (charIdx+2);

(...skipping 145 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1349 if (U_FAILURE(status)) {	1515 if (U_FAILURE(status)) {

1350 dataerrln("ICU Error %s while parsing test file at line %d.",	1516 dataerrln("ICU Error %s while parsing test file at line %d.",

1351 u_errorName(status), lineNum);	1517 u_errorName(status), lineNum);

1352 status = U_ZERO_ERROR;	1518 status = U_ZERO_ERROR;

1353 goto end_test; // Stop the test	1519 goto end_test; // Stop the test

1354 }	1520 }

1355	1521

1356 }	1522 }

1357	1523

1358 end_test:	1524 end_test:

1359 delete tp.bi;

1360 delete tp.expectedBreaks;

1361 delete tp.srcLine;

1362 delete tp.srcCol;

1363 delete [] testFile;	1525 delete [] testFile;

1364 #endif	1526 #endif

1365 }	1527 }

1366	1528

1367	1529

1368 //------------------------------------------------------------------------------ -	1530 //------------------------------------------------------------------------------ -

1369 //	1531 //

1370 // TestDictRules create a break iterator from source rules that includes a	1532 // TestDictRules create a break iterator from source rules that includes a

1371 // dictionary range. Regression for bug #7130. Source rules	1533 // dictionary range. Regression for bug #7130. Source rules

1372 // do not declare a break iterator type (word, line, sentence, etc.	1534 // do not declare a break iterator type (word, line, sentence, etc.

(...skipping 2965 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4338 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode);	4500 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode);

4339 if (!prependSet.isEmpty()) {	4501 if (!prependSet.isEmpty()) {

4340 errln(	4502 errln(

4341 "[:GCB=Prepend:] is not empty any more. "	4503 "[:GCB=Prepend:] is not empty any more. "

4342 "Uncomment relevant lines in source/data/brkitr/char.txt and "	4504 "Uncomment relevant lines in source/data/brkitr/char.txt and "

4343 "change this test to the opposite condition.");	4505 "change this test to the opposite condition.");

4344 }	4506 }

4345 }	4507 }

4346	4508

4347 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */	4509 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

OLD	NEW

« no previous file with comments | « source/test/intltest/rbbitst.h ('k') | source/test/intltest/regcoll.h » ('j') | no next file with comments »