OLD | NEW |
1 /******************************************************************** | 1 /******************************************************************** |
2 * COPYRIGHT: | 2 * COPYRIGHT: |
3 * Copyright (c) 1997-2014, International Business Machines Corporation and | 3 * Copyright (c) 1997-2015, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ********************************************************************/ | 5 ********************************************************************/ |
6 /***************************************************************************** | 6 /***************************************************************************** |
7 * | 7 * |
8 * File CU_CAPITST.C | 8 * File ccapitst.c |
9 * | 9 * |
10 * Modification History: | 10 * Modification History: |
11 * Name Description | 11 * Name Description |
12 * Madhu Katragadda Ported for C API | 12 * Madhu Katragadda Ported for C API |
13 ****************************************************************************** | 13 ****************************************************************************** |
14 */ | 14 */ |
15 #include <stdio.h> | 15 #include <stdio.h> |
16 #include <stdlib.h> | 16 #include <stdlib.h> |
17 #include <string.h> | 17 #include <string.h> |
18 #include <ctype.h> | 18 #include <ctype.h> |
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1142 */ | 1142 */ |
1143 static void TestAlias() { | 1143 static void TestAlias() { |
1144 int32_t i, ncnv; | 1144 int32_t i, ncnv; |
1145 UErrorCode status = U_ZERO_ERROR; | 1145 UErrorCode status = U_ZERO_ERROR; |
1146 | 1146 |
1147 /* Predetermined aliases that we expect to map back to ISO_2022 | 1147 /* Predetermined aliases that we expect to map back to ISO_2022 |
1148 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ | 1148 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ |
1149 const char* ISO_2022_NAMES[] = | 1149 const char* ISO_2022_NAMES[] = |
1150 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", | 1150 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", |
1151 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; | 1151 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; |
1152 int32_t ISO_2022_NAMES_LENGTH = | 1152 int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES); |
1153 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); | |
1154 const char *UTF8_NAMES[] = | 1153 const char *UTF8_NAMES[] = |
1155 { "UTF-8", "utf-8", "utf8", "ibm-1208", | 1154 { "UTF-8", "utf-8", "utf8", "ibm-1208", |
1156 "utf_8", "ibm1208", "cp1208" }; | 1155 "utf_8", "ibm1208", "cp1208" }; |
1157 int32_t UTF8_NAMES_LENGTH = | 1156 int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES); |
1158 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); | |
1159 | 1157 |
1160 struct { | 1158 struct { |
1161 const char *name; | 1159 const char *name; |
1162 const char *alias; | 1160 const char *alias; |
1163 } CONVERTERS_NAMES[] = { | 1161 } CONVERTERS_NAMES[] = { |
1164 { "UTF-32BE", "UTF32_BigEndian" }, | 1162 { "UTF-32BE", "UTF32_BigEndian" }, |
1165 { "UTF-32LE", "UTF32_LittleEndian" }, | 1163 { "UTF-32LE", "UTF32_LittleEndian" }, |
1166 { "UTF-32", "ISO-10646-UCS-4" }, | 1164 { "UTF-32", "ISO-10646-UCS-4" }, |
1167 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, | 1165 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, |
1168 { "UTF-32", "ucs-4" } | 1166 { "UTF-32", "ucs-4" } |
(...skipping 20 matching lines...) Expand all Loading... |
1189 cnv = ucnv_open(name, &status); | 1187 cnv = ucnv_open(name, &status); |
1190 if (U_FAILURE(status)) { | 1188 if (U_FAILURE(status)) { |
1191 log_data_err("FAIL: Converter \"%s\" (i=%d)" | 1189 log_data_err("FAIL: Converter \"%s\" (i=%d)" |
1192 " can't be opened.\n", | 1190 " can't be opened.\n", |
1193 name, i); | 1191 name, i); |
1194 } | 1192 } |
1195 else { | 1193 else { |
1196 if (strcmp(ucnv_getName(cnv, &status), name) != 0 | 1194 if (strcmp(ucnv_getName(cnv, &status), name) != 0 |
1197 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "Opposit
eEndian") == 0)) { | 1195 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "Opposit
eEndian") == 0)) { |
1198 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " | 1196 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " |
1199 "The should be the same\n", | 1197 "They should be the same\n", |
1200 name, ucnv_getName(cnv, &status)); | 1198 name, ucnv_getName(cnv, &status)); |
1201 } | 1199 } |
1202 } | 1200 } |
1203 ucnv_close(cnv); | 1201 ucnv_close(cnv); |
1204 | 1202 |
1205 status = U_ZERO_ERROR; | 1203 status = U_ZERO_ERROR; |
1206 alias0 = ucnv_getAlias(name, 0, &status); | 1204 alias0 = ucnv_getAlias(name, 0, &status); |
1207 for (j=1; j<na; ++j) { | 1205 for (j=1; j<na; ++j) { |
1208 const char *alias; | 1206 const char *alias; |
1209 /* Make sure each alias maps back to the the same list of | 1207 /* Make sure each alias maps back to the the same list of |
(...skipping 785 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1995 } | 1993 } |
1996 } | 1994 } |
1997 #endif | 1995 #endif |
1998 } | 1996 } |
1999 | 1997 |
2000 /* bug2: pre-flighting loop bug: simple overflow causes bug */ | 1998 /* bug2: pre-flighting loop bug: simple overflow causes bug */ |
2001 static void bug2() | 1999 static void bug2() |
2002 { | 2000 { |
2003 /* US-ASCII "1234567890" */ | 2001 /* US-ASCII "1234567890" */ |
2004 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39 }; | 2002 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39 }; |
| 2003 #if !UCONFIG_ONLY_HTML_CONVERSION |
2005 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (
char)0xef, (char)0x80, (char)0x80 }; | 2004 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (
char)0xef, (char)0x80, (char)0x80 }; |
2006 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, | 2005 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, |
2007 0x00, 0x00, 0x00, 0x31, | 2006 0x00, 0x00, 0x00, 0x31, |
2008 0x00, 0x00, 0x00, 0x32, | 2007 0x00, 0x00, 0x00, 0x32, |
2009 0x00, 0x00, 0x00, 0x33, | 2008 0x00, 0x00, 0x00, 0x33, |
2010 0x00, 0x00, 0x00, 0x34, | 2009 0x00, 0x00, 0x00, 0x34, |
2011 0x00, 0x00, 0x00, 0x35, | 2010 0x00, 0x00, 0x00, 0x35, |
2012 0x00, 0x00, 0x00, 0x36, | 2011 0x00, 0x00, 0x00, 0x36, |
2013 0x00, 0x00, 0x00, 0x37, | 2012 0x00, 0x00, 0x00, 0x37, |
2014 0x00, 0x00, 0x00, 0x38, | 2013 0x00, 0x00, 0x00, 0x38, |
2015 0x00, 0x00, (char)0xf0, 0x00}; | 2014 0x00, 0x00, (char)0xf0, 0x00}; |
| 2015 #endif |
| 2016 |
2016 static char target[5]; | 2017 static char target[5]; |
2017 | 2018 |
2018 UErrorCode err = U_ZERO_ERROR; | 2019 UErrorCode err = U_ZERO_ERROR; |
2019 int32_t size; | 2020 int32_t size; |
2020 | 2021 |
2021 /* do the conversion */ | 2022 /* do the conversion */ |
2022 size = ucnv_convert("iso-8859-1", /* out */ | 2023 size = ucnv_convert("iso-8859-1", /* out */ |
2023 "us-ascii", /* in */ | 2024 "us-ascii", /* in */ |
2024 target, | 2025 target, |
2025 sizeof(target), | 2026 sizeof(target), |
2026 source, | 2027 source, |
2027 sizeof(source), | 2028 sizeof(source), |
2028 &err); | 2029 &err); |
2029 | 2030 |
2030 if ( size != 10 ) { | 2031 if ( size != 10 ) { |
2031 /* bug2: size is 5, should be 10 */ | 2032 /* bug2: size is 5, should be 10 */ |
2032 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting si
ze %d instead of 10\n", size); | 2033 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting si
ze %d instead of 10\n", size); |
2033 } | 2034 } |
2034 | 2035 |
| 2036 #if !UCONFIG_ONLY_HTML_CONVERSION |
2035 err = U_ZERO_ERROR; | 2037 err = U_ZERO_ERROR; |
2036 /* do the conversion */ | 2038 /* do the conversion */ |
2037 size = ucnv_convert("UTF-32BE", /* out */ | 2039 size = ucnv_convert("UTF-32BE", /* out */ |
2038 "UTF-8", /* in */ | 2040 "UTF-8", /* in */ |
2039 target, | 2041 target, |
2040 sizeof(target), | 2042 sizeof(target), |
2041 sourceUTF8, | 2043 sourceUTF8, |
2042 sizeof(sourceUTF8), | 2044 sizeof(sourceUTF8), |
2043 &err); | 2045 &err); |
2044 | 2046 |
2045 if ( size != 32 ) { | 2047 if ( size != 32 ) { |
2046 /* bug2: size is 5, should be 32 */ | 2048 /* bug2: size is 5, should be 32 */ |
2047 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d inst
ead of 32\n", size); | 2049 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d inst
ead of 32\n", size); |
2048 } | 2050 } |
2049 | 2051 |
2050 err = U_ZERO_ERROR; | 2052 err = U_ZERO_ERROR; |
2051 /* do the conversion */ | 2053 /* do the conversion */ |
2052 size = ucnv_convert("UTF-8", /* out */ | 2054 size = ucnv_convert("UTF-8", /* out */ |
2053 "UTF-32BE", /* in */ | 2055 "UTF-32BE", /* in */ |
2054 target, | 2056 target, |
2055 sizeof(target), | 2057 sizeof(target), |
2056 sourceUTF32, | 2058 sourceUTF32, |
2057 sizeof(sourceUTF32), | 2059 sizeof(sourceUTF32), |
2058 &err); | 2060 &err); |
2059 | 2061 |
2060 if ( size != 12 ) { | 2062 if ( size != 12 ) { |
2061 /* bug2: size is 5, should be 12 */ | 2063 /* bug2: size is 5, should be 12 */ |
2062 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d inst
ead of 12\n", size); | 2064 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d inst
ead of 12\n", size); |
2063 } | 2065 } |
| 2066 #endif |
2064 } | 2067 } |
2065 | 2068 |
2066 /* | 2069 /* |
2067 * bug3: when the characters expand going from source to target codepage | 2070 * bug3: when the characters expand going from source to target codepage |
2068 * you get bug3 in addition to bug2 | 2071 * you get bug3 in addition to bug2 |
2069 */ | 2072 */ |
2070 static void bug3() | 2073 static void bug3() |
2071 { | 2074 { |
2072 #if !UCONFIG_NO_LEGACY_CONVERSION | 2075 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
2073 char char_in[CHUNK_SIZE*4]; | 2076 char char_in[CHUNK_SIZE*4]; |
2074 char target[5]; | 2077 char target[5]; |
2075 UErrorCode err = U_ZERO_ERROR; | 2078 UErrorCode err = U_ZERO_ERROR; |
2076 int32_t size; | 2079 int32_t size; |
2077 | 2080 |
2078 /* | 2081 /* |
2079 * first get the buggy size from bug2 then | 2082 * first get the buggy size from bug2 then |
2080 * compare it to buggy size with an expansion | 2083 * compare it to buggy size with an expansion |
2081 */ | 2084 */ |
2082 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ | 2085 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ |
(...skipping 663 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2746 }, | 2749 }, |
2747 /*errorTarget[]={*/ | 2750 /*errorTarget[]={*/ |
2748 /* | 2751 /* |
2749 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: | 2752 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: |
2750 * SUB, SUB, 0x40, SUB, SUB, 0x40 | 2753 * SUB, SUB, 0x40, SUB, SUB, 0x40 |
2751 */ | 2754 */ |
2752 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ | 2755 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ |
2753 /*},*/ | 2756 /*},*/ |
2754 utf16[]={ | 2757 utf16[]={ |
2755 0xfe, 0xff /* BOM only, no text */ | 2758 0xfe, 0xff /* BOM only, no text */ |
2756 }, | 2759 }; |
2757 utf32[]={ | 2760 #if !UCONFIG_ONLY_HTML_CONVERSION |
| 2761 static const uint8_t utf32[]={ |
2758 0xff, 0xfe, 0, 0 /* BOM only, no text */ | 2762 0xff, 0xfe, 0, 0 /* BOM only, no text */ |
2759 }; | 2763 }; |
| 2764 #endif |
2760 | 2765 |
2761 char target[100], utf8NUL[100], shiftJISNUL[100]; | 2766 char target[100], utf8NUL[100], shiftJISNUL[100]; |
2762 | 2767 |
2763 UConverter *cnv; | 2768 UConverter *cnv; |
2764 UErrorCode errorCode; | 2769 UErrorCode errorCode; |
2765 | 2770 |
2766 int32_t length; | 2771 int32_t length; |
2767 | 2772 |
2768 errorCode=U_ZERO_ERROR; | 2773 errorCode=U_ZERO_ERROR; |
2769 cnv=ucnv_open("Shift-JIS", &errorCode); | 2774 cnv=ucnv_open("Shift-JIS", &errorCode); |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2819 /* pseudo-empty string, no target buffer */ | 2824 /* pseudo-empty string, no target buffer */ |
2820 errorCode=U_ZERO_ERROR; | 2825 errorCode=U_ZERO_ERROR; |
2821 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16,
2, &errorCode); | 2826 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16,
2, &errorCode); |
2822 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || | 2827 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || |
2823 length!=0 | 2828 length!=0 |
2824 ) { | 2829 ) { |
2825 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s ex
pect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", | 2830 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s ex
pect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", |
2826 u_errorName(errorCode), length); | 2831 u_errorName(errorCode), length); |
2827 } | 2832 } |
2828 | 2833 |
| 2834 #if !UCONFIG_ONLY_HTML_CONVERSION |
2829 errorCode=U_ZERO_ERROR; | 2835 errorCode=U_ZERO_ERROR; |
2830 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32,
4, &errorCode); | 2836 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32,
4, &errorCode); |
2831 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || | 2837 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || |
2832 length!=0 | 2838 length!=0 |
2833 ) { | 2839 ) { |
2834 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s ex
pect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", | 2840 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s ex
pect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", |
2835 u_errorName(errorCode), length); | 2841 u_errorName(errorCode), length); |
2836 } | 2842 } |
| 2843 #endif |
2837 | 2844 |
2838 /* bad arguments */ | 2845 /* bad arguments */ |
2839 errorCode=U_MESSAGE_PARSE_ERROR; | 2846 errorCode=U_MESSAGE_PARSE_ERROR; |
2840 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16,
2, &errorCode); | 2847 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16,
2, &errorCode); |
2841 if(errorCode!=U_MESSAGE_PARSE_ERROR) { | 2848 if(errorCode!=U_MESSAGE_PARSE_ERROR) { |
2842 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_error
Name(errorCode)); | 2849 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_error
Name(errorCode)); |
2843 } | 2850 } |
2844 | 2851 |
2845 /* source==NULL */ | 2852 /* source==NULL */ |
2846 errorCode=U_ZERO_ERROR; | 2853 errorCode=U_ZERO_ERROR; |
(...skipping 574 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3421 | 3428 |
3422 log_verbose("getDefaultName returned %s\n", defaultName); | 3429 log_verbose("getDefaultName returned %s\n", defaultName); |
3423 | 3430 |
3424 /*change the default name by setting it */ | 3431 /*change the default name by setting it */ |
3425 TestOneDefaultNameChange("UTF-8", "UTF-8"); | 3432 TestOneDefaultNameChange("UTF-8", "UTF-8"); |
3426 #if U_CHARSET_IS_UTF8 | 3433 #if U_CHARSET_IS_UTF8 |
3427 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); | 3434 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); |
3428 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); | 3435 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); |
3429 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); | 3436 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); |
3430 #else | 3437 #else |
3431 # if !UCONFIG_NO_LEGACY_CONVERSION | 3438 # if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
3432 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); | 3439 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); |
3433 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); | 3440 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); |
3434 # endif | 3441 # endif |
3435 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); | 3442 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); |
3436 #endif | 3443 #endif |
3437 | 3444 |
3438 /*set the default name back*/ | 3445 /*set the default name back*/ |
3439 ucnv_setDefaultName(defaultName); | 3446 ucnv_setDefaultName(defaultName); |
3440 } | 3447 } |
3441 | 3448 |
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3701 } | 3708 } |
3702 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &er
rorCode); | 3709 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &er
rorCode); |
3703 | 3710 |
3704 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1,
length)) { | 3711 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1,
length)) { |
3705 log_err("unexpected %s BOM writing behavior -- %s\n", | 3712 log_err("unexpected %s BOM writing behavior -- %s\n", |
3706 names[i], u_errorName(errorCode)); | 3713 names[i], u_errorName(errorCode)); |
3707 } | 3714 } |
3708 ucnv_close(cnv); | 3715 ucnv_close(cnv); |
3709 } | 3716 } |
3710 } | 3717 } |
OLD | NEW |