Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Side by Side Diff: source/test/intltest/rbbitst.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/test/intltest/rbbitst.h ('k') | source/test/intltest/regcoll.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /******************************************************************** 1 /********************************************************************
2 * COPYRIGHT: 2 * COPYRIGHT:
3 * Copyright (c) 1999-2013, International Business Machines Corporation and 3 * Copyright (c) 1999-2014, International Business Machines Corporation and
4 * others. All Rights Reserved. 4 * others. All Rights Reserved.
5 ********************************************************************/ 5 ********************************************************************/
6 /************************************************************************ 6 /************************************************************************
7 * Date Name Description 7 * Date Name Description
8 * 12/15/99 Madhu Creation. 8 * 12/15/99 Madhu Creation.
9 * 01/12/2000 Madhu Updated for changed API and added new tests 9 * 01/12/2000 Madhu Updated for changed API and added new tests
10 ************************************************************************/ 10 ************************************************************************/
11 11
12 #include "utypeinfo.h" // for 'typeid' to work 12 #include "utypeinfo.h" // for 'typeid' to work
13 13
(...skipping 10 matching lines...) Expand all
24 #include "unicode/schriter.h" 24 #include "unicode/schriter.h"
25 #include "unicode/uniset.h" 25 #include "unicode/uniset.h"
26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
27 #include "unicode/regex.h" 27 #include "unicode/regex.h"
28 #endif 28 #endif
29 #include "unicode/ustring.h" 29 #include "unicode/ustring.h"
30 #include "unicode/utext.h" 30 #include "unicode/utext.h"
31 #include "intltest.h" 31 #include "intltest.h"
32 #include "rbbitst.h" 32 #include "rbbitst.h"
33 #include <string.h> 33 #include <string.h>
34 #include "charstr.h"
34 #include "uvector.h" 35 #include "uvector.h"
35 #include "uvectr32.h" 36 #include "uvectr32.h"
36 #include <string.h>
37 #include <stdio.h> 37 #include <stdio.h>
38 #include <stdlib.h> 38 #include <stdlib.h>
39 #include "unicode/numfmt.h" 39 #include "unicode/numfmt.h"
40 #include "unicode/uscript.h" 40 #include "unicode/uscript.h"
41 41
42 #define TEST_ASSERT(x) {if (!(x)) { \ 42 #define TEST_ASSERT(x) {if (!(x)) { \
43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}} 43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}}
44 44
45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ 45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \
46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}} 46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}}
(...skipping 300 matching lines...) Expand 10 before | Expand all | Expand 10 after
347 errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag); 347 errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag);
348 break; 348 break;
349 } 349 }
350 i++; 350 i++;
351 } 351 }
352 } 352 }
353 delete bi; 353 delete bi;
354 } 354 }
355 355
356 356
357 static void printStringBreaks(UnicodeString ustr, int expected[], 357 static void printStringBreaks(UText *tstr, int expected[], int expectedCount) {
358 int expectedcount)
359 {
360 UErrorCode status = U_ZERO_ERROR; 358 UErrorCode status = U_ZERO_ERROR;
361 char name[100]; 359 char name[100];
362 printf("code alpha extend alphanum type word sent line name\n"); 360 printf("code alpha extend alphanum type word sent line name\n");
363 int j; 361 int nextExpectedIndex = 0;
364 for (j = 0; j < ustr.length(); j ++) { 362 utext_setNativeIndex(tstr, 0);
365 if (expectedcount > 0) { 363 for (int j = 0; j < utext_nativeLength(tstr); j=utext_getNativeIndex(tstr)) {
366 int k; 364 if (nextExpectedIndex < expectedCount && j >= expected[nextExpectedIndex ] ) {
367 for (k = 0; k < expectedcount; k ++) { 365 printf("------------------------------------------------ %d\n", j);
368 if (j == expected[k]) { 366 ++nextExpectedIndex;
369 printf("------------------------------------------------ %d\ n",
370 j);
371 }
372 }
373 } 367 }
374 UChar32 c = ustr.char32At(j); 368
375 if (c > 0xffff) { 369 UChar32 c = utext_next32(tstr);
376 j ++;
377 }
378 u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status); 370 u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status);
379 printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c, 371 printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c,
380 u_isUAlphabetic(c), 372 u_isUAlphabetic(c),
381 u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND), 373 u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND),
382 u_isalnum(c), 374 u_isalnum(c),
383 u_getPropertyValueName(UCHAR_GENERAL_CATEGORY, 375 u_getPropertyValueName(UCHAR_GENERAL_CATEGORY,
384 u_charType(c), 376 u_charType(c),
385 U_SHORT_PROPERTY_NAME), 377 U_SHORT_PROPERTY_NAME),
386 u_getPropertyValueName(UCHAR_WORD_BREAK, 378 u_getPropertyValueName(UCHAR_WORD_BREAK,
387 u_getIntPropertyValue(c, 379 u_getIntPropertyValue(c,
388 UCHAR_WORD_BREAK), 380 UCHAR_WORD_BREAK),
389 U_SHORT_PROPERTY_NAME), 381 U_SHORT_PROPERTY_NAME),
390 u_getPropertyValueName(UCHAR_SENTENCE_BREAK, 382 u_getPropertyValueName(UCHAR_SENTENCE_BREAK,
391 u_getIntPropertyValue(c, 383 u_getIntPropertyValue(c,
392 UCHAR_SENTENCE_BREAK), 384 UCHAR_SENTENCE_BREAK),
393 U_SHORT_PROPERTY_NAME), 385 U_SHORT_PROPERTY_NAME),
394 u_getPropertyValueName(UCHAR_LINE_BREAK, 386 u_getPropertyValueName(UCHAR_LINE_BREAK,
395 u_getIntPropertyValue(c, 387 u_getIntPropertyValue(c,
396 UCHAR_LINE_BREAK), 388 UCHAR_LINE_BREAK),
397 U_SHORT_PROPERTY_NAME), 389 U_SHORT_PROPERTY_NAME),
398 name); 390 name);
399 } 391 }
400 } 392 }
401 393
402 394
395 static void printStringBreaks(const UnicodeString &ustr, int expected[], int exp ectedCount) {
396 UErrorCode status = U_ZERO_ERROR;
397 UText *tstr = NULL;
398 tstr = utext_openConstUnicodeString(NULL, &ustr, &status);
399 if (U_FAILURE(status)) {
400 printf("printStringBreaks, utext_openConstUnicodeString() returns %s\n", u_errorName(status));
401 return;
402 }
403 printStringBreaks(tstr, expected, expectedCount);
404 utext_close(tstr);
405 }
406
407
403 void RBBITest::TestBug3818() { 408 void RBBITest::TestBug3818() {
404 UErrorCode status = U_ZERO_ERROR; 409 UErrorCode status = U_ZERO_ERROR;
405 410
406 // Four Thai words... 411 // Four Thai words...
407 static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0 x0E2B,0x0E0D,0x0E48, 412 static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0 x0E2B,0x0E0D,0x0E48,
408 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0 x0E2B,0x0E0D,0x0E48, 0 }; 413 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0 x0E2B,0x0E0D,0x0E48, 0 };
409 UnicodeString thaiStr(thaiWordData); 414 UnicodeString thaiStr(thaiWordData);
410 415
411 BreakIterator* bi = BreakIterator::createWordInstance(Locale("th"), status); 416 BreakIterator* bi = BreakIterator::createWordInstance(Locale("th"), status);
412 if (U_FAILURE(status) || bi == NULL) { 417 if (U_FAILURE(status) || bi == NULL) {
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after
823 828
824 829
825 830
826 //------------------------------------------------------------------------------ 831 //------------------------------------------------------------------------------
827 // 832 //
828 // RBBITest::Extended Run RBBI Tests from an external test data file 833 // RBBITest::Extended Run RBBI Tests from an external test data file
829 // 834 //
830 //------------------------------------------------------------------------------ 835 //------------------------------------------------------------------------------
831 836
832 struct TestParams { 837 struct TestParams {
833 BreakIterator *bi; 838 BreakIterator *bi; // Break iterator is set while parsin g test source.
834 UnicodeString dataToBreak; 839 // Changed out whenever test data c hanges break type.
835 UVector32 *expectedBreaks; 840
836 UVector32 *srcLine; 841 UnicodeString dataToBreak; // Data that is built up while parsin g the test.
842 UVector32 *expectedBreaks; // Expected break positions, matches dataToBreak UnicodeString.
843 UVector32 *srcLine; // Positions in source file, indexed same as dataToBreak.
837 UVector32 *srcCol; 844 UVector32 *srcCol;
845
846 UText *textToBreak; // UText, could be UTF8 or UTF16.
847 UVector32 *textMap; // Map from UTF-16 dataToBreak offset s to UText offsets.
848 CharString utf8String; // UTF-8 form of text to break.
849
850 TestParams(UErrorCode &status) : dataToBreak() {
851 bi = NULL;
852 expectedBreaks = new UVector32(status);
853 srcLine = new UVector32(status);
854 srcCol = new UVector32(status);
855 textToBreak = NULL;
856 textMap = new UVector32(status);
857 }
858
859 ~TestParams() {
860 delete bi;
861 delete expectedBreaks;
862 delete srcLine;
863 delete srcCol;
864 utext_close(textToBreak);
865 delete textMap;
866 }
867
868 int32_t getSrcLine(int32_t bp);
869 int32_t getExpectedBreak(int32_t bp);
870 int32_t getSrcCol(int32_t bp);
871
872 void setUTF16(UErrorCode &status);
873 void setUTF8(UErrorCode &status);
838 }; 874 };
839 875
840 void RBBITest::executeTest(TestParams *t) { 876 // Append a UnicodeString to a CharString with UTF-8 encoding.
877 // Substitute any invalid chars.
878 // Note: this is used with test data that includes a few unpaired surrogates i n the UTF-16 that will be substituted.
879 static void CharStringAppend(CharString &dest, const UnicodeString &src, UErrorC ode &status) {
880 if (U_FAILURE(status)) {
881 return;
882 }
883 int32_t utf8Length;
884 u_strToUTF8WithSub(NULL, 0, &utf8Length, // Output Buffer, NULL f or preflight.
885 src.getBuffer(), src.length(), // UTF-16 data
886 0xfffd, NULL, // Substitution char, nu mber of subs.
887 &status);
888 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
889 return;
890 }
891 status = U_ZERO_ERROR;
892 int32_t capacity;
893 char *buffer = dest.getAppendBuffer(utf8Length, utf8Length, capacity, status );
894 u_strToUTF8WithSub(buffer, utf8Length, NULL,
895 src.getBuffer(), src.length(),
896 0xfffd, NULL, &status);
897 dest.append(buffer, utf8Length, status);
898 }
899
900
901 void TestParams::setUTF16(UErrorCode &status) {
902 textToBreak = utext_openUnicodeString(textToBreak, &dataToBreak, &status);
903 textMap->removeAllElements();
904 for (int32_t i=0; i<dataToBreak.length(); i++) {
905 if (i == dataToBreak.getChar32Start(i)) {
906 textMap->addElement(i, status);
907 } else {
908 textMap->addElement(-1, status);
909 }
910 }
911 textMap->addElement(dataToBreak.length(), status);
912 U_ASSERT(dataToBreak.length() + 1 == textMap->size());
913 }
914
915
916 void TestParams::setUTF8(UErrorCode &status) {
917 if (U_FAILURE(status)) {
918 return;
919 }
920 utf8String.clear();
921 CharStringAppend(utf8String, dataToBreak, status);
922 textToBreak = utext_openUTF8(textToBreak, utf8String.data(), utf8String.leng th(), &status);
923 if (U_FAILURE(status)) {
924 return;
925 }
926
927 textMap->removeAllElements();
928 int32_t utf16Index = 0;
929 for (;;) {
930 textMap->addElement(utf16Index, status);
931 UChar32 c32 = utext_current32(textToBreak);
932 if (c32 < 0) {
933 break;
934 }
935 utf16Index += U16_LENGTH(c32);
936 utext_next32(textToBreak);
937 while (textMap->size() < utext_getNativeIndex(textToBreak)) {
938 textMap->addElement(-1, status);
939 }
940 }
941 U_ASSERT(utext_nativeLength(textToBreak) + 1 == textMap->size());
942 }
943
944
945 int32_t TestParams::getSrcLine(int bp) {
946 if (bp >= textMap->size()) {
947 bp = textMap->size() - 1;
948 }
949 int32_t i = 0;
950 for(; bp >= 0 ; --bp) {
951 // Move to a character boundary if we are not on one already.
952 i = textMap->elementAti(bp);
953 if (i >= 0) {
954 break;
955 }
956 }
957 return srcLine->elementAti(i);
958 }
959
960
961 int32_t TestParams::getExpectedBreak(int bp) {
962 if (bp >= textMap->size()) {
963 return 0;
964 }
965 int32_t i = textMap->elementAti(bp);
966 int32_t retVal = 0;
967 if (i >= 0) {
968 retVal = expectedBreaks->elementAti(i);
969 }
970 return retVal;
971 }
972
973
974 int32_t TestParams::getSrcCol(int bp) {
975 if (bp >= textMap->size()) {
976 bp = textMap->size() - 1;
977 }
978 int32_t i = 0;
979 for(; bp >= 0; --bp) {
980 // Move bp to a character boundary if we are not on one already.
981 i = textMap->elementAti(bp);
982 if (i >= 0) {
983 break;
984 }
985 }
986 return srcCol->elementAti(i);
987 }
988
989
990 void RBBITest::executeTest(TestParams *t, UErrorCode &status) {
841 int32_t bp; 991 int32_t bp;
842 int32_t prevBP; 992 int32_t prevBP;
843 int32_t i; 993 int32_t i;
844 994
995 TEST_ASSERT_SUCCESS(status);
996 if (U_FAILURE(status)) {
997 return;
998 }
999
845 if (t->bi == NULL) { 1000 if (t->bi == NULL) {
846 return; 1001 return;
847 } 1002 }
848 1003
849 t->bi->setText(t->dataToBreak); 1004 t->bi->setText(t->textToBreak, status);
850 // 1005 //
851 // Run the iterator forward 1006 // Run the iterator forward
852 // 1007 //
853 prevBP = -1; 1008 prevBP = -1;
854 for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) { 1009 for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) {
855 if (prevBP == bp) { 1010 if (prevBP == bp) {
856 // Fail for lack of forward progress. 1011 // Fail for lack of forward progress.
857 errln("Forward Iteration, no forward progress. Break Pos=%4d File line,col=%4d,%4d", 1012 errln("Forward Iteration, no forward progress. Break Pos=%4d File line,col=%4d,%4d",
858 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 1013 bp, t->getSrcLine(bp), t->getSrcCol(bp));
859 break; 1014 break;
860 } 1015 }
861 1016
862 // Check that there were we didn't miss an expected break between the la st one 1017 // Check that there we didn't miss an expected break between the last on e
863 // and this one. 1018 // and this one.
864 for (i=prevBP+1; i<bp; i++) { 1019 for (i=prevBP+1; i<bp; i++) {
865 if (t->expectedBreaks->elementAti(i) != 0) { 1020 if (t->getExpectedBreak(i) != 0) {
866 int expected[] = {0, i}; 1021 int expected[] = {0, i};
867 printStringBreaks(t->dataToBreak, expected, 2); 1022 printStringBreaks(t->dataToBreak, expected, 2);
868 errln("Forward Iteration, break expected, but not found. Pos=%4 d File line,col= %4d,%4d", 1023 errln("Forward Iteration, break expected, but not found. Pos=%4 d File line,col= %4d,%4d",
869 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 1024 i, t->getSrcLine(i), t->getSrcCol(i));
870 } 1025 }
871 } 1026 }
872 1027
873 // Check that the break we did find was expected 1028 // Check that the break we did find was expected
874 if (t->expectedBreaks->elementAti(bp) == 0) { 1029 if (t->getExpectedBreak(bp) == 0) {
875 int expected[] = {0, bp}; 1030 int expected[] = {0, bp};
876 printStringBreaks(t->dataToBreak, expected, 2); 1031 printStringBreaks(t->textToBreak, expected, 2);
877 errln("Forward Iteration, break found, but not expected. Pos=%4d F ile line,col= %4d,%4d", 1032 errln("Forward Iteration, break found, but not expected. Pos=%4d F ile line,col= %4d,%4d",
878 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 1033 bp, t->getSrcLine(bp), t->getSrcCol(bp));
879 } else { 1034 } else {
880 // The break was expected. 1035 // The break was expected.
881 // Check that the {nnn} tag value is correct. 1036 // Check that the {nnn} tag value is correct.
882 int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); 1037 int32_t expectedTagVal = t->getExpectedBreak(bp);
883 if (expectedTagVal == -1) { 1038 if (expectedTagVal == -1) {
884 expectedTagVal = 0; 1039 expectedTagVal = 0;
885 } 1040 }
886 int32_t line = t->srcLine->elementAti(bp); 1041 int32_t line = t->getSrcLine(bp);
887 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); 1042 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();
888 if (rs != expectedTagVal) { 1043 if (rs != expectedTagVal) {
889 errln("Incorrect status for forward break. Pos=%4d File line,c ol= %4d,%4d.\n" 1044 errln("Incorrect status for forward break. Pos=%4d File line,c ol= %4d,%4d.\n"
890 " Actual, Expected status = %4d, %4d", 1045 " Actual, Expected status = %4d, %4d",
891 bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); 1046 bp, line, t->getSrcCol(bp), rs, expectedTagVal);
892 } 1047 }
893 } 1048 }
894 1049
895
896 prevBP = bp; 1050 prevBP = bp;
897 } 1051 }
898 1052
899 // Verify that there were no missed expected breaks after the last one found 1053 // Verify that there were no missed expected breaks after the last one found
900 for (i=prevBP+1; i<t->expectedBreaks->size(); i++) { 1054 for (i=prevBP+1; i<utext_nativeLength(t->textToBreak); i++) {
901 if (t->expectedBreaks->elementAti(i) != 0) { 1055 if (t->getExpectedBreak(i) != 0) {
902 errln("Forward Iteration, break expected, but not found. Pos=%4d F ile line,col= %4d,%4d", 1056 errln("Forward Iteration, break expected, but not found. Pos=%4d F ile line,col= %4d,%4d",
903 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 1057 i, t->getSrcLine(i), t->getSrcCol(i));
904 } 1058 }
905 } 1059 }
906 1060
907 // 1061 //
908 // Run the iterator backwards, verify that the same breaks are found. 1062 // Run the iterator backwards, verify that the same breaks are found.
909 // 1063 //
910 prevBP = t->dataToBreak.length()+2; // start with a phony value for the las t break pos seen. 1064 prevBP = utext_nativeLength(t->textToBreak)+2; // start with a phony value for the last break pos seen.
911 for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) { 1065 for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) {
912 if (prevBP == bp) { 1066 if (prevBP == bp) {
913 // Fail for lack of progress. 1067 // Fail for lack of progress.
914 errln("Reverse Iteration, no progress. Break Pos=%4d File line,col =%4d,%4d", 1068 errln("Reverse Iteration, no progress. Break Pos=%4d File line,col =%4d,%4d",
915 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 1069 bp, t->getSrcLine(bp), t->getSrcCol(bp));
916 break; 1070 break;
917 } 1071 }
918 1072
919 // Check that there were we didn't miss an expected break between the la st one 1073 // Check that we didn't miss an expected break between the last one
920 // and this one. (UVector returns zeros for index out of bounds.) 1074 // and this one. (UVector returns zeros for index out of bounds.)
921 for (i=prevBP-1; i>bp; i--) { 1075 for (i=prevBP-1; i>bp; i--) {
922 if (t->expectedBreaks->elementAti(i) != 0) { 1076 if (t->getExpectedBreak(i) != 0) {
923 errln("Reverse Itertion, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 1077 errln("Reverse Iteration, break expected, but not found. Pos=%4 d File line,col= %4d,%4d",
924 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 1078 i, t->getSrcLine(i), t->getSrcCol(i));
925 } 1079 }
926 } 1080 }
927 1081
928 // Check that the break we did find was expected 1082 // Check that the break we did find was expected
929 if (t->expectedBreaks->elementAti(bp) == 0) { 1083 if (t->getExpectedBreak(bp) == 0) {
930 errln("Reverse Itertion, break found, but not expected. Pos=%4d Fi le line,col= %4d,%4d", 1084 errln("Reverse Itertion, break found, but not expected. Pos=%4d Fi le line,col= %4d,%4d",
931 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 1085 bp, t->getSrcLine(bp), t->getSrcCol(bp));
932 } else { 1086 } else {
933 // The break was expected. 1087 // The break was expected.
934 // Check that the {nnn} tag value is correct. 1088 // Check that the {nnn} tag value is correct.
935 int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); 1089 int32_t expectedTagVal = t->getExpectedBreak(bp);
936 if (expectedTagVal == -1) { 1090 if (expectedTagVal == -1) {
937 expectedTagVal = 0; 1091 expectedTagVal = 0;
938 } 1092 }
939 int line = t->srcLine->elementAti(bp); 1093 int line = t->getSrcLine(bp);
940 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); 1094 int32_t rs = t->bi->getRuleStatus();
941 if (rs != expectedTagVal) { 1095 if (rs != expectedTagVal) {
942 errln("Incorrect status for reverse break. Pos=%4d File line,c ol= %4d,%4d.\n" 1096 errln("Incorrect status for reverse break. Pos=%4d File line,c ol= %4d,%4d.\n"
943 " Actual, Expected status = %4d, %4d", 1097 " Actual, Expected status = %4d, %4d",
944 bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); 1098 bp, line, t->getSrcCol(bp), rs, expectedTagVal);
945 } 1099 }
946 } 1100 }
947 1101
948 prevBP = bp; 1102 prevBP = bp;
949 } 1103 }
950 1104
951 // Verify that there were no missed breaks prior to the last one found 1105 // Verify that there were no missed breaks prior to the last one found
952 for (i=prevBP-1; i>=0; i--) { 1106 for (i=prevBP-1; i>=0; i--) {
953 if (t->expectedBreaks->elementAti(i) != 0) { 1107 if (t->getExpectedBreak(i) != 0) {
954 errln("Forward Itertion, break expected, but not found. Pos=%4d Fi le line,col= %4d,%4d", 1108 errln("Forward Itertion, break expected, but not found. Pos=%4d Fi le line,col= %4d,%4d",
955 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 1109 i, t->getSrcLine(i), t->getSrcCol(i));
956 } 1110 }
957 } 1111 }
958 1112
959 // Check isBoundary() 1113 // Check isBoundary()
960 for (i=0; i<t->expectedBreaks->size(); i++) { 1114 for (i=0; i < utext_nativeLength(t->textToBreak); i++) {
961 UBool boundaryExpected = (t->expectedBreaks->elementAti(i) != 0); 1115 UBool boundaryExpected = (t->getExpectedBreak(i) != 0);
962 UBool boundaryFound = t->bi->isBoundary(i); 1116 UBool boundaryFound = t->bi->isBoundary(i);
963 if (boundaryExpected != boundaryFound) { 1117 if (boundaryExpected != boundaryFound) {
964 errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n" 1118 errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n"
965 " Expected, Actual= %s, %s", 1119 " Expected, Actual= %s, %s",
966 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), 1120 i, t->getSrcLine(i), t->getSrcCol(i),
967 boundaryExpected ? "true":"false", boundaryFound? "true" : "fa lse"); 1121 boundaryExpected ? "true":"false", boundaryFound? "true" : "fa lse");
968 } 1122 }
969 } 1123 }
970 1124
971 // Check following() 1125 // Check following()
972 for (i=0; i<t->expectedBreaks->size(); i++) { 1126 for (i=0; i < utext_nativeLength(t->textToBreak); i++) {
973 int32_t actualBreak = t->bi->following(i); 1127 int32_t actualBreak = t->bi->following(i);
974 int32_t expectedBreak = BreakIterator::DONE; 1128 int32_t expectedBreak = BreakIterator::DONE;
975 for (int32_t j=i+1; j < t->expectedBreaks->size(); j++) { 1129 for (int32_t j=i+1; j <= utext_nativeLength(t->textToBreak); j++) {
976 if (t->expectedBreaks->elementAti(j) != 0) { 1130 if (t->getExpectedBreak(j) != 0) {
977 expectedBreak = j; 1131 expectedBreak = j;
978 break; 1132 break;
979 } 1133 }
980 } 1134 }
981 if (expectedBreak != actualBreak) { 1135 if (expectedBreak != actualBreak) {
982 errln("following(%d) incorrect. File line,col= %4d,%4d\n" 1136 errln("following(%d) incorrect. File line,col= %4d,%4d\n"
983 " Expected, Actual= %d, %d", 1137 " Expected, Actual= %d, %d",
984 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expect edBreak, actualBreak); 1138 i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBre ak);
985 } 1139 }
986 } 1140 }
987 1141
988 // Check preceding() 1142 // Check preceding()
989 for (i=t->expectedBreaks->size(); i>=0; i--) { 1143 for (i=utext_nativeLength(t->textToBreak); i>=0; i--) {
990 int32_t actualBreak = t->bi->preceding(i); 1144 int32_t actualBreak = t->bi->preceding(i);
991 int32_t expectedBreak = BreakIterator::DONE; 1145 int32_t expectedBreak = BreakIterator::DONE;
992 1146
993 for (int32_t j=i-1; j >= 0; j--) { 1147 // For UTF-8 & UTF-16 supplementals, all code units of a character are e quivalent.
994 if (t->expectedBreaks->elementAti(j) != 0) { 1148 // preceding(trailing byte) will return the index of some preceding code point,
1149 // not the lead byte of the current code point, even though that has a s maller index.
1150 // Therefore, start looking at the expected break data not at i-1, but a t
1151 // the start of code point index - 1.
1152 utext_setNativeIndex(t->textToBreak, i);
1153 int32_t j = utext_getNativeIndex(t->textToBreak) - 1;
1154 for (; j >= 0; j--) {
1155 if (t->getExpectedBreak(j) != 0) {
995 expectedBreak = j; 1156 expectedBreak = j;
996 break; 1157 break;
997 } 1158 }
998 } 1159 }
999 if (expectedBreak != actualBreak) { 1160 if (expectedBreak != actualBreak) {
1000 errln("preceding(%d) incorrect. File line,col= %4d,%4d\n" 1161 errln("preceding(%d) incorrect. File line,col= %4d,%4d\n"
1001 " Expected, Actual= %d, %d", 1162 " Expected, Actual= %d, %d",
1002 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expect edBreak, actualBreak); 1163 i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBre ak);
1003 } 1164 }
1004 } 1165 }
1005 } 1166 }
1006 1167
1007 1168
1008 void RBBITest::TestExtended() { 1169 void RBBITest::TestExtended() {
1009 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 1170 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
1010 UErrorCode status = U_ZERO_ERROR; 1171 UErrorCode status = U_ZERO_ERROR;
1011 Locale locale(""); 1172 Locale locale("");
1012 1173
1013 UnicodeString rules; 1174 UnicodeString rules;
1014 TestParams tp; 1175 TestParams tp(status);
1015 tp.bi = NULL;
1016 tp.expectedBreaks = new UVector32(status);
1017 tp.srcLine = new UVector32(status);
1018 tp.srcCol = new UVector32(status);
1019 1176
1020 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{ Nd}_]*) *>"), 0, status); 1177 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{ Nd}_]*) *>"), 0, status);
1021 if (U_FAILURE(status)) { 1178 if (U_FAILURE(status)) {
1022 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI NE__, u_errorName(status)); 1179 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI NE__, u_errorName(status));
1023 } 1180 }
1024 1181
1025 1182
1026 // 1183 //
1027 // Open and read the test data file. 1184 // Open and read the test data file.
1028 // 1185 //
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after
1183 // Add final entry to mappings from break location to source fil e position. 1340 // Add final entry to mappings from break location to source fil e position.
1184 // Need one extra because last break position returned is after the 1341 // Need one extra because last break position returned is after the
1185 // last char in the data, not at the last char. 1342 // last char in the data, not at the last char.
1186 tp.srcLine->addElement(lineNum, status); 1343 tp.srcLine->addElement(lineNum, status);
1187 tp.srcCol ->addElement(column, status); 1344 tp.srcCol ->addElement(column, status);
1188 1345
1189 parseState = PARSE_TAG; 1346 parseState = PARSE_TAG;
1190 charIdx += 6; 1347 charIdx += 6;
1191 1348
1192 // RUN THE TEST! 1349 // RUN THE TEST!
1193 executeTest(&tp); 1350 status = U_ZERO_ERROR;
1351 tp.setUTF16(status);
1352 executeTest(&tp, status);
1353 TEST_ASSERT_SUCCESS(status);
1354
1355 // Run again, this time with UTF-8 text wrapped in a UText.
1356 status = U_ZERO_ERROR;
1357 tp.setUTF8(status);
1358 TEST_ASSERT_SUCCESS(status);
1359 executeTest(&tp, status);
1194 break; 1360 break;
1195 } 1361 }
1196 1362
1197 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) { 1363 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {
1198 // Named character, e.g. \N{COMBINING GRAVE ACCENT} 1364 // Named character, e.g. \N{COMBINING GRAVE ACCENT}
1199 // Get the code point from the name and insert it into the test data. 1365 // Get the code point from the name and insert it into the test data.
1200 // (Damn, no API takes names in Unicode !!! 1366 // (Damn, no API takes names in Unicode !!!
1201 // we've got to take it back to char *) 1367 // we've got to take it back to char *)
1202 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, char Idx); 1368 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, char Idx);
1203 int32_t nameLength = nameEndIdx - (charIdx+2); 1369 int32_t nameLength = nameEndIdx - (charIdx+2);
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
1349 if (U_FAILURE(status)) { 1515 if (U_FAILURE(status)) {
1350 dataerrln("ICU Error %s while parsing test file at line %d.", 1516 dataerrln("ICU Error %s while parsing test file at line %d.",
1351 u_errorName(status), lineNum); 1517 u_errorName(status), lineNum);
1352 status = U_ZERO_ERROR; 1518 status = U_ZERO_ERROR;
1353 goto end_test; // Stop the test 1519 goto end_test; // Stop the test
1354 } 1520 }
1355 1521
1356 } 1522 }
1357 1523
1358 end_test: 1524 end_test:
1359 delete tp.bi;
1360 delete tp.expectedBreaks;
1361 delete tp.srcLine;
1362 delete tp.srcCol;
1363 delete [] testFile; 1525 delete [] testFile;
1364 #endif 1526 #endif
1365 } 1527 }
1366 1528
1367 1529
1368 //------------------------------------------------------------------------------ - 1530 //------------------------------------------------------------------------------ -
1369 // 1531 //
1370 // TestDictRules create a break iterator from source rules that includes a 1532 // TestDictRules create a break iterator from source rules that includes a
1371 // dictionary range. Regression for bug #7130. Source rules 1533 // dictionary range. Regression for bug #7130. Source rules
1372 // do not declare a break iterator type (word, line, sentence, etc. 1534 // do not declare a break iterator type (word, line, sentence, etc.
(...skipping 2965 matching lines...) Expand 10 before | Expand all | Expand 10 after
4338 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode); 4500 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode);
4339 if (!prependSet.isEmpty()) { 4501 if (!prependSet.isEmpty()) {
4340 errln( 4502 errln(
4341 "[:GCB=Prepend:] is not empty any more. " 4503 "[:GCB=Prepend:] is not empty any more. "
4342 "Uncomment relevant lines in source/data/brkitr/char.txt and " 4504 "Uncomment relevant lines in source/data/brkitr/char.txt and "
4343 "change this test to the opposite condition."); 4505 "change this test to the opposite condition.");
4344 } 4506 }
4345 } 4507 }
4346 4508
4347 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 4509 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
OLDNEW
« no previous file with comments | « source/test/intltest/rbbitst.h ('k') | source/test/intltest/regcoll.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698