| OLD | NEW |
| 1 /* | 1 /* |
| 2 ****************************************************************************** | 2 ****************************************************************************** |
| 3 * | 3 * |
| 4 * Copyright (C) 2003-2007, International Business Machines | 4 * Copyright (C) 2003-2007, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
| 6 * | 6 * |
| 7 ****************************************************************************** | 7 ****************************************************************************** |
| 8 * file name: ucnv_ext.c | 8 * file name: ucnv_ext.c |
| 9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
| (...skipping 928 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 939 | 939 |
| 940 /* set the error code for unassigned */ | 940 /* set the error code for unassigned */ |
| 941 *pErrorCode=U_INVALID_CHAR_FOUND; | 941 *pErrorCode=U_INVALID_CHAR_FOUND; |
| 942 } | 942 } |
| 943 } | 943 } |
| 944 | 944 |
| 945 static void | 945 static void |
| 946 ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, | 946 ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, |
| 947 const int32_t *cx, | 947 const int32_t *cx, |
| 948 const USetAdder *sa, | 948 const USetAdder *sa, |
| 949 UConverterUnicodeSet which, | 949 UBool useFallback, |
| 950 int32_t minLength, | 950 int32_t minLength, |
| 951 UChar32 c, | 951 UChar32 c, |
| 952 UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, | 952 UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, |
| 953 int32_t sectionIndex, | 953 int32_t sectionIndex, |
| 954 UErrorCode *pErrorCode) { | 954 UErrorCode *pErrorCode) { |
| 955 const UChar *fromUSectionUChars; | 955 const UChar *fromUSectionUChars; |
| 956 const uint32_t *fromUSectionValues; | 956 const uint32_t *fromUSectionValues; |
| 957 | 957 |
| 958 uint32_t value; | 958 uint32_t value; |
| 959 int32_t i, count; | 959 int32_t i, count; |
| 960 | 960 |
| 961 fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+s
ectionIndex; | 961 fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+s
ectionIndex; |
| 962 fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t
)+sectionIndex; | 962 fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t
)+sectionIndex; |
| 963 | 963 |
| 964 /* read first pair of the section */ | 964 /* read first pair of the section */ |
| 965 count=*fromUSectionUChars++; | 965 count=*fromUSectionUChars++; |
| 966 value=*fromUSectionValues++; | 966 value=*fromUSectionValues++; |
| 967 | 967 |
| 968 if( value!=0 && | 968 if( value!=0 && |
| 969 UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) && | 969 (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) && |
| 970 UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength | 970 UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength |
| 971 ) { | 971 ) { |
| 972 if(c>=0) { | 972 if(c>=0) { |
| 973 /* add the initial code point */ | 973 /* add the initial code point */ |
| 974 sa->add(sa->set, c); | 974 sa->add(sa->set, c); |
| 975 } else { | 975 } else { |
| 976 /* add the string so far */ | 976 /* add the string so far */ |
| 977 sa->addString(sa->set, s, length); | 977 sa->addString(sa->set, s, length); |
| 978 } | 978 } |
| 979 } | 979 } |
| 980 | 980 |
| 981 for(i=0; i<count; ++i) { | 981 for(i=0; i<count; ++i) { |
| 982 /* append this code unit and recurse or add the string */ | 982 /* append this code unit and recurse or add the string */ |
| 983 s[length]=fromUSectionUChars[i]; | 983 s[length]=fromUSectionUChars[i]; |
| 984 value=fromUSectionValues[i]; | 984 value=fromUSectionValues[i]; |
| 985 | 985 |
| 986 if(value==0) { | 986 if(value==0) { |
| 987 /* no mapping, do nothing */ | 987 /* no mapping, do nothing */ |
| 988 } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { | 988 } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
| 989 ucnv_extGetUnicodeSetString( | 989 ucnv_extGetUnicodeSetString( |
| 990 sharedData, cx, sa, which, minLength, | 990 sharedData, cx, sa, useFallback, minLength, |
| 991 U_SENTINEL, s, length+1, | 991 U_SENTINEL, s, length+1, |
| 992 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), | 992 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), |
| 993 pErrorCode); | 993 pErrorCode); |
| 994 } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERV
ED_MASK))== | 994 } else if((useFallback ? |
| 995 UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) && | 995 (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 : |
| 996 ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RE
SERVED_MASK))== |
| 997 UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) && |
| 996 UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength | 998 UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength |
| 997 ) { | 999 ) { |
| 998 sa->addString(sa->set, s, length+1); | 1000 sa->addString(sa->set, s, length+1); |
| 999 } | 1001 } |
| 1000 } | 1002 } |
| 1001 } | 1003 } |
| 1002 | 1004 |
| 1003 U_CFUNC void | 1005 U_CFUNC void |
| 1004 ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, | 1006 ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, |
| 1005 const USetAdder *sa, | 1007 const USetAdder *sa, |
| 1006 UConverterUnicodeSet which, | 1008 UConverterUnicodeSet which, |
| 1009 UConverterSetFilter filter, |
| 1007 UErrorCode *pErrorCode) { | 1010 UErrorCode *pErrorCode) { |
| 1008 const int32_t *cx; | 1011 const int32_t *cx; |
| 1009 const uint16_t *stage12, *stage3, *ps2, *ps3; | 1012 const uint16_t *stage12, *stage3, *ps2, *ps3; |
| 1010 const uint32_t *stage3b; | 1013 const uint32_t *stage3b; |
| 1011 | 1014 |
| 1012 uint32_t value; | 1015 uint32_t value; |
| 1013 int32_t st1, stage1Length, st2, st3, minLength; | 1016 int32_t st1, stage1Length, st2, st3, minLength; |
| 1017 UBool useFallback; |
| 1014 | 1018 |
| 1015 UChar s[UCNV_EXT_MAX_UCHARS]; | 1019 UChar s[UCNV_EXT_MAX_UCHARS]; |
| 1016 UChar32 c; | 1020 UChar32 c; |
| 1017 int32_t length; | 1021 int32_t length; |
| 1018 | 1022 |
| 1019 cx=sharedData->mbcs.extIndexes; | 1023 cx=sharedData->mbcs.extIndexes; |
| 1020 if(cx==NULL) { | 1024 if(cx==NULL) { |
| 1021 return; | 1025 return; |
| 1022 } | 1026 } |
| 1023 | 1027 |
| 1024 stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); | 1028 stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); |
| 1025 stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); | 1029 stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); |
| 1026 stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); | 1030 stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); |
| 1027 | 1031 |
| 1028 stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; | 1032 stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; |
| 1029 | 1033 |
| 1034 useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); |
| 1035 |
| 1030 /* enumerate the from-Unicode trie table */ | 1036 /* enumerate the from-Unicode trie table */ |
| 1031 c=0; /* keep track of the current code point while enumerating */ | 1037 c=0; /* keep track of the current code point while enumerating */ |
| 1032 | 1038 |
| 1033 if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) { | 1039 if(filter==UCNV_SET_FILTER_2022_CN) { |
| 1040 minLength=3; |
| 1041 } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || |
| 1042 filter!=UCNV_SET_FILTER_NONE |
| 1043 ) { |
| 1034 /* DBCS-only, ignore single-byte results */ | 1044 /* DBCS-only, ignore single-byte results */ |
| 1035 minLength=2; | 1045 minLength=2; |
| 1036 } else { | 1046 } else { |
| 1037 minLength=1; | 1047 minLength=1; |
| 1038 } | 1048 } |
| 1039 | 1049 |
| 1040 /* | 1050 /* |
| 1041 * the trie enumeration is almost the same as | 1051 * the trie enumeration is almost the same as |
| 1042 * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 | 1052 * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 |
| 1043 */ | 1053 */ |
| (...skipping 13 matching lines...) Expand all Loading... |
| 1057 * Recurse for partial results. | 1067 * Recurse for partial results. |
| 1058 */ | 1068 */ |
| 1059 do { | 1069 do { |
| 1060 value=stage3b[*ps3++]; | 1070 value=stage3b[*ps3++]; |
| 1061 if(value==0) { | 1071 if(value==0) { |
| 1062 /* no mapping, do nothing */ | 1072 /* no mapping, do nothing */ |
| 1063 } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { | 1073 } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
| 1064 length=0; | 1074 length=0; |
| 1065 U16_APPEND_UNSAFE(s, length, c); | 1075 U16_APPEND_UNSAFE(s, length, c); |
| 1066 ucnv_extGetUnicodeSetString( | 1076 ucnv_extGetUnicodeSetString( |
| 1067 sharedData, cx, sa, which, minLength, | 1077 sharedData, cx, sa, useFallback, minLength, |
| 1068 c, s, length, | 1078 c, s, length, |
| 1069 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value
), | 1079 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value
), |
| 1070 pErrorCode); | 1080 pErrorCode); |
| 1071 } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_E
XT_FROM_U_RESERVED_MASK))== | 1081 } else if((useFallback ? |
| 1072 UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) && | 1082 (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 : |
| 1083 ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UC
NV_EXT_FROM_U_RESERVED_MASK))== |
| 1084 UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) && |
| 1073 UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength | 1085 UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength |
| 1074 ) { | 1086 ) { |
| 1087 switch(filter) { |
| 1088 case UCNV_SET_FILTER_2022_CN: |
| 1089 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCN
V_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { |
| 1090 continue; |
| 1091 } |
| 1092 break; |
| 1093 case UCNV_SET_FILTER_SJIS: |
| 1094 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (va
lue=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { |
| 1095 continue; |
| 1096 } |
| 1097 break; |
| 1098 case UCNV_SET_FILTER_GR94DBCS: |
| 1099 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && |
| 1100 (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(
value))-0xa1a1)<=(0xfefe - 0xa1a1) && |
| 1101 (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { |
| 1102 continue; |
| 1103 } |
| 1104 break; |
| 1105 case UCNV_SET_FILTER_HZ: |
| 1106 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && |
| 1107 (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(
value))-0xa1a1)<=(0xfdfe - 0xa1a1) && |
| 1108 (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { |
| 1109 continue; |
| 1110 } |
| 1111 break; |
| 1112 default: |
| 1113 /* |
| 1114 * UCNV_SET_FILTER_NONE, |
| 1115 * or UCNV_SET_FILTER_DBCS_ONLY which is handled
via minLength |
| 1116 */ |
| 1117 break; |
| 1118 } |
| 1075 sa->add(sa->set, c); | 1119 sa->add(sa->set, c); |
| 1076 } | 1120 } |
| 1077 } while((++c&0xf)!=0); | 1121 } while((++c&0xf)!=0); |
| 1078 } else { | 1122 } else { |
| 1079 c+=16; /* empty stage 3 block */ | 1123 c+=16; /* empty stage 3 block */ |
| 1080 } | 1124 } |
| 1081 } | 1125 } |
| 1082 } else { | 1126 } else { |
| 1083 c+=1024; /* empty stage 2 block */ | 1127 c+=1024; /* empty stage 2 block */ |
| 1084 } | 1128 } |
| 1085 } | 1129 } |
| 1086 } | 1130 } |
| 1087 | 1131 |
| 1088 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 1132 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| OLD | NEW |