OLD | NEW |
1 /* | 1 /* |
2 ****************************************************************************** | 2 ****************************************************************************** |
3 * | 3 * |
4 * Copyright (C) 2002-2011, International Business Machines | 4 * Copyright (C) 2002-2014, International Business Machines |
5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
6 * | 6 * |
7 ****************************************************************************** | 7 ****************************************************************************** |
8 * file name: bocu1tst.c | 8 * file name: bocu1tst.c |
9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
11 * indentation:4 | 11 * indentation:4 |
12 * | 12 * |
13 * created on: 2002may27 | 13 * created on: 2002may27 |
14 * created by: Markus W. Scherer | 14 * created by: Markus W. Scherer |
(...skipping 13 matching lines...) Expand all Loading... |
28 * ### links in design doc to here and to ucnvbocu.c | 28 * ### links in design doc to here and to ucnvbocu.c |
29 */ | 29 */ |
30 | 30 |
31 #include "unicode/utypes.h" | 31 #include "unicode/utypes.h" |
32 #include "unicode/ustring.h" | 32 #include "unicode/ustring.h" |
33 #include "unicode/ucnv.h" | 33 #include "unicode/ucnv.h" |
34 #include "unicode/utf16.h" | 34 #include "unicode/utf16.h" |
35 #include "cmemory.h" | 35 #include "cmemory.h" |
36 #include "cintltst.h" | 36 #include "cintltst.h" |
37 | 37 |
38 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) | |
39 | |
40 /* icuhtml/design/conversion/bocu1/bocu1.h ---------------------------------- */ | 38 /* icuhtml/design/conversion/bocu1/bocu1.h ---------------------------------- */ |
41 | 39 |
42 /* BOCU-1 constants and macros ---------------------------------------------- */ | 40 /* BOCU-1 constants and macros ---------------------------------------------- */ |
43 | 41 |
44 /* | 42 /* |
45 * BOCU-1 encodes the code points of a Unicode string as | 43 * BOCU-1 encodes the code points of a Unicode string as |
46 * a sequence of byte-encoded differences (slope detection), | 44 * a sequence of byte-encoded differences (slope detection), |
47 * preserving lexical order. | 45 * preserving lexical order. |
48 * | 46 * |
49 * Optimize the difference-taking for runs of Unicode text within | 47 * Optimize the difference-taking for runs of Unicode text within |
(...skipping 894 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
944 static const UChar plane1[]={ 0xd800, 0xdc00 }; | 942 static const UChar plane1[]={ 0xd800, 0xdc00 }; |
945 static const UChar plane2[]={ 0xd845, 0xdddd }; | 943 static const UChar plane2[]={ 0xd845, 0xdddd }; |
946 static const UChar plane15[]={ 0xdbbb, 0xddee, 0x20 }; | 944 static const UChar plane15[]={ 0xdbbb, 0xddee, 0x20 }; |
947 static const UChar plane16[]={ 0xdbff, 0xdfff }; | 945 static const UChar plane16[]={ 0xdbff, 0xdfff }; |
948 static const UChar c0[]={ 1, 0xe40, 0x20, 9 }; | 946 static const UChar c0[]={ 1, 0xe40, 0x20, 9 }; |
949 | 947 |
950 static const struct { | 948 static const struct { |
951 const UChar *s; | 949 const UChar *s; |
952 int32_t length; | 950 int32_t length; |
953 } strings[]={ | 951 } strings[]={ |
954 { feff, LENGTHOF(feff) }, | 952 { feff, UPRV_LENGTHOF(feff) }, |
955 { ascii, LENGTHOF(ascii) }, | 953 { ascii, UPRV_LENGTHOF(ascii) }, |
956 { crlf, LENGTHOF(crlf) }, | 954 { crlf, UPRV_LENGTHOF(crlf) }, |
957 { nul, LENGTHOF(nul) }, | 955 { nul, UPRV_LENGTHOF(nul) }, |
958 { latin, LENGTHOF(latin) }, | 956 { latin, UPRV_LENGTHOF(latin) }, |
959 { devanagari, LENGTHOF(devanagari) }, | 957 { devanagari, UPRV_LENGTHOF(devanagari) }, |
960 { hiragana, LENGTHOF(hiragana) }, | 958 { hiragana, UPRV_LENGTHOF(hiragana) }, |
961 { unihan, LENGTHOF(unihan) }, | 959 { unihan, UPRV_LENGTHOF(unihan) }, |
962 { hangul, LENGTHOF(hangul) }, | 960 { hangul, UPRV_LENGTHOF(hangul) }, |
963 { surrogates, LENGTHOF(surrogates) }, | 961 { surrogates, UPRV_LENGTHOF(surrogates) }, |
964 { plane1, LENGTHOF(plane1) }, | 962 { plane1, UPRV_LENGTHOF(plane1) }, |
965 { plane2, LENGTHOF(plane2) }, | 963 { plane2, UPRV_LENGTHOF(plane2) }, |
966 { plane15, LENGTHOF(plane15) }, | 964 { plane15, UPRV_LENGTHOF(plane15) }, |
967 { plane16, LENGTHOF(plane16) }, | 965 { plane16, UPRV_LENGTHOF(plane16) }, |
968 { c0, LENGTHOF(c0) } | 966 { c0, UPRV_LENGTHOF(c0) } |
969 }; | 967 }; |
970 | 968 |
971 /* | 969 /* |
972 * Verify that the ICU BOCU-1 implementation produces the same results as | 970 * Verify that the ICU BOCU-1 implementation produces the same results as |
973 * the reference implementation from the design folder. | 971 * the reference implementation from the design folder. |
974 * Generate some texts and convert them with both converters, verifying | 972 * Generate some texts and convert them with both converters, verifying |
975 * identical results and roundtripping. | 973 * identical results and roundtripping. |
976 */ | 974 */ |
977 static void | 975 static void |
978 TestBOCU1(void) { | 976 TestBOCU1(void) { |
979 UChar *text; | 977 UChar *text; |
980 int32_t i, length; | 978 int32_t i, length; |
981 | 979 |
982 UConverter *bocu1; | 980 UConverter *bocu1; |
983 UErrorCode errorCode; | 981 UErrorCode errorCode; |
984 | 982 |
985 errorCode=U_ZERO_ERROR; | 983 errorCode=U_ZERO_ERROR; |
986 bocu1=ucnv_open("BOCU-1", &errorCode); | 984 bocu1=ucnv_open("BOCU-1", &errorCode); |
987 if(U_FAILURE(errorCode)) { | 985 if(U_FAILURE(errorCode)) { |
988 log_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(erro
rCode)); | 986 log_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(erro
rCode)); |
989 return; | 987 return; |
990 } | 988 } |
991 | 989 |
992 text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); | 990 text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); |
993 | 991 |
994 /* text 1: each of strings[] once */ | 992 /* text 1: each of strings[] once */ |
995 length=0; | 993 length=0; |
996 for(i=0; i<LENGTHOF(strings); ++i) { | 994 for(i=0; i<UPRV_LENGTHOF(strings); ++i) { |
997 u_memcpy(text+length, strings[i].s, strings[i].length); | 995 u_memcpy(text+length, strings[i].s, strings[i].length); |
998 length+=strings[i].length; | 996 length+=strings[i].length; |
999 } | 997 } |
1000 roundtripBOCU1(bocu1, 1, text, length); | 998 roundtripBOCU1(bocu1, 1, text, length); |
1001 | 999 |
1002 /* text 2: each of strings[] twice */ | 1000 /* text 2: each of strings[] twice */ |
1003 length=0; | 1001 length=0; |
1004 for(i=0; i<LENGTHOF(strings); ++i) { | 1002 for(i=0; i<UPRV_LENGTHOF(strings); ++i) { |
1005 u_memcpy(text+length, strings[i].s, strings[i].length); | 1003 u_memcpy(text+length, strings[i].s, strings[i].length); |
1006 length+=strings[i].length; | 1004 length+=strings[i].length; |
1007 u_memcpy(text+length, strings[i].s, strings[i].length); | 1005 u_memcpy(text+length, strings[i].s, strings[i].length); |
1008 length+=strings[i].length; | 1006 length+=strings[i].length; |
1009 } | 1007 } |
1010 roundtripBOCU1(bocu1, 2, text, length); | 1008 roundtripBOCU1(bocu1, 2, text, length); |
1011 | 1009 |
1012 /* text 3: each of strings[] many times (set step vs. |strings| so that all
strings are used) */ | 1010 /* text 3: each of strings[] many times (set step vs. |strings| so that all
strings are used) */ |
1013 length=0; | 1011 length=0; |
1014 for(i=1; length<5000; i+=7) { | 1012 for(i=1; length<5000; i+=7) { |
1015 if(i>=LENGTHOF(strings)) { | 1013 if(i>=UPRV_LENGTHOF(strings)) { |
1016 i-=LENGTHOF(strings); | 1014 i-=UPRV_LENGTHOF(strings); |
1017 } | 1015 } |
1018 u_memcpy(text+length, strings[i].s, strings[i].length); | 1016 u_memcpy(text+length, strings[i].s, strings[i].length); |
1019 length+=strings[i].length; | 1017 length+=strings[i].length; |
1020 } | 1018 } |
1021 roundtripBOCU1(bocu1, 3, text, length); | 1019 roundtripBOCU1(bocu1, 3, text, length); |
1022 | 1020 |
1023 ucnv_close(bocu1); | 1021 ucnv_close(bocu1); |
1024 free(text); | 1022 free(text); |
1025 } | 1023 } |
1026 | 1024 |
1027 U_CFUNC void addBOCU1Tests(TestNode** root); | 1025 U_CFUNC void addBOCU1Tests(TestNode** root); |
1028 | 1026 |
1029 U_CFUNC void | 1027 U_CFUNC void |
1030 addBOCU1Tests(TestNode** root) { | 1028 addBOCU1Tests(TestNode** root) { |
1031 addTest(root, TestBOCU1RefDiff, "tsconv/bocu1tst/TestBOCU1RefDiff"); | 1029 addTest(root, TestBOCU1RefDiff, "tsconv/bocu1tst/TestBOCU1RefDiff"); |
1032 addTest(root, TestBOCU1, "tsconv/bocu1tst/TestBOCU1"); | 1030 addTest(root, TestBOCU1, "tsconv/bocu1tst/TestBOCU1"); |
1033 } | 1031 } |
OLD | NEW |