OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ***************************************************************************** |
| 3 * |
| 4 * Copyright (C) 1998-2007, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ***************************************************************************** |
| 8 * |
| 9 * ucnv_err.c |
| 10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode |
| 11 * |
| 12 * |
| 13 * Change history: |
| 14 * |
| 15 * 06/29/2000 helena Major rewrite of the callback APIs. |
| 16 */ |
| 17 |
| 18 #include "unicode/utypes.h" |
| 19 |
| 20 #if !UCONFIG_NO_CONVERSION |
| 21 |
| 22 #include "unicode/ucnv_err.h" |
| 23 #include "unicode/ucnv_cb.h" |
| 24 #include "ucnv_cnv.h" |
| 25 #include "cmemory.h" |
| 26 #include "unicode/ucnv.h" |
| 27 #include "ustrfmt.h" |
| 28 |
| 29 #define VALUE_STRING_LENGTH 32 |
| 30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per cha
r for any converter) */ |
| 31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 |
| 32 #define UNICODE_U_CODEPOINT 0x0055 |
| 33 #define UNICODE_X_CODEPOINT 0x0058 |
| 34 #define UNICODE_RS_CODEPOINT 0x005C |
| 35 #define UNICODE_U_LOW_CODEPOINT 0x0075 |
| 36 #define UNICODE_X_LOW_CODEPOINT 0x0078 |
| 37 #define UNICODE_AMP_CODEPOINT 0x0026 |
| 38 #define UNICODE_HASH_CODEPOINT 0x0023 |
| 39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B |
| 40 #define UNICODE_PLUS_CODEPOINT 0x002B |
| 41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B |
| 42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D |
| 43 #define UNICODE_SPACE_CODEPOINT 0x0020 |
| 44 #define UCNV_PRV_ESCAPE_ICU 0 |
| 45 #define UCNV_PRV_ESCAPE_C 'C' |
| 46 #define UCNV_PRV_ESCAPE_XML_DEC 'D' |
| 47 #define UCNV_PRV_ESCAPE_XML_HEX 'X' |
| 48 #define UCNV_PRV_ESCAPE_JAVA 'J' |
| 49 #define UCNV_PRV_ESCAPE_UNICODE 'U' |
| 50 #define UCNV_PRV_ESCAPE_CSS2 'S' |
| 51 #define UCNV_PRV_STOP_ON_ILLEGAL 'i' |
| 52 |
| 53 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ |
| 54 U_CAPI void U_EXPORT2 |
| 55 UCNV_FROM_U_CALLBACK_STOP ( |
| 56 const void *context, |
| 57 UConverterFromUnicodeArgs *fromUArgs, |
| 58 const UChar* codeUnits, |
| 59 int32_t length, |
| 60 UChar32 codePoint, |
| 61 UConverterCallbackReason reason, |
| 62 UErrorCode * err) |
| 63 { |
| 64 /* the caller must have set the error code accordingly */ |
| 65 return; |
| 66 } |
| 67 |
| 68 |
| 69 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ |
| 70 U_CAPI void U_EXPORT2 |
| 71 UCNV_TO_U_CALLBACK_STOP ( |
| 72 const void *context, |
| 73 UConverterToUnicodeArgs *toUArgs, |
| 74 const char* codePoints, |
| 75 int32_t length, |
| 76 UConverterCallbackReason reason, |
| 77 UErrorCode * err) |
| 78 { |
| 79 /* the caller must have set the error code accordingly */ |
| 80 return; |
| 81 } |
| 82 |
| 83 U_CAPI void U_EXPORT2 |
| 84 UCNV_FROM_U_CALLBACK_SKIP ( |
| 85 const void *context, |
| 86 UConverterFromUnicodeArgs *fromUArgs, |
| 87 const UChar* codeUnits, |
| 88 int32_t length, |
| 89 UChar32 codePoint, |
| 90 UConverterCallbackReason reason, |
| 91 UErrorCode * err) |
| 92 { |
| 93 if (reason <= UCNV_IRREGULAR) |
| 94 { |
| 95 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL &&
reason == UCNV_UNASSIGNED)) |
| 96 { |
| 97 *err = U_ZERO_ERROR; |
| 98 } |
| 99 /* else the caller must have set the error code accordingly. */ |
| 100 } |
| 101 /* else ignore the reset, close and clone calls. */ |
| 102 } |
| 103 |
| 104 U_CAPI void U_EXPORT2 |
| 105 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( |
| 106 const void *context, |
| 107 UConverterFromUnicodeArgs *fromArgs, |
| 108 const UChar* codeUnits, |
| 109 int32_t length, |
| 110 UChar32 codePoint, |
| 111 UConverterCallbackReason reason, |
| 112 UErrorCode * err) |
| 113 { |
| 114 if (reason <= UCNV_IRREGULAR) |
| 115 { |
| 116 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL &&
reason == UCNV_UNASSIGNED)) |
| 117 { |
| 118 *err = U_ZERO_ERROR; |
| 119 ucnv_cbFromUWriteSub(fromArgs, 0, err); |
| 120 } |
| 121 /* else the caller must have set the error code accordingly. */ |
| 122 } |
| 123 /* else ignore the reset, close and clone calls. */ |
| 124 } |
| 125 |
| 126 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, |
| 127 *uses a clean copy (resetted) of the converter, to convert that unicode |
| 128 *escape sequence to the target codepage (if conversion failure happens then |
| 129 *we revert to substituting with subchar) |
| 130 */ |
| 131 U_CAPI void U_EXPORT2 |
| 132 UCNV_FROM_U_CALLBACK_ESCAPE ( |
| 133 const void *context, |
| 134 UConverterFromUnicodeArgs *fromArgs, |
| 135 const UChar *codeUnits, |
| 136 int32_t length, |
| 137 UChar32 codePoint, |
| 138 UConverterCallbackReason reason, |
| 139 UErrorCode * err) |
| 140 { |
| 141 |
| 142 UChar valueString[VALUE_STRING_LENGTH]; |
| 143 int32_t valueStringLength = 0; |
| 144 int32_t i = 0; |
| 145 |
| 146 const UChar *myValueSource = NULL; |
| 147 UErrorCode err2 = U_ZERO_ERROR; |
| 148 UConverterFromUCallback original = NULL; |
| 149 const void *originalContext; |
| 150 |
| 151 UConverterFromUCallback ignoredCallback = NULL; |
| 152 const void *ignoredContext; |
| 153 |
| 154 if (reason > UCNV_IRREGULAR) |
| 155 { |
| 156 return; |
| 157 } |
| 158 |
| 159 ucnv_setFromUCallBack (fromArgs->converter, |
| 160 (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, |
| 161 NULL, |
| 162 &original, |
| 163 &originalContext, |
| 164 &err2); |
| 165 |
| 166 if (U_FAILURE (err2)) |
| 167 { |
| 168 *err = err2; |
| 169 return; |
| 170 } |
| 171 if(context==NULL) |
| 172 { |
| 173 while (i < length) |
| 174 { |
| 175 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOIN
T; /* adding % */ |
| 176 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* addin
g U */ |
| 177 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_S
TRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); |
| 178 } |
| 179 } |
| 180 else |
| 181 { |
| 182 switch(*((char*)context)) |
| 183 { |
| 184 case UCNV_PRV_ESCAPE_JAVA: |
| 185 while (i < length) |
| 186 { |
| 187 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;
/* adding \ */ |
| 188 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT
; /* adding u */ |
| 189 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); |
| 190 } |
| 191 break; |
| 192 |
| 193 case UCNV_PRV_ESCAPE_C: |
| 194 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /*
adding \ */ |
| 195 |
| 196 if(length==2){ |
| 197 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /*
adding U */ |
| 198 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); |
| 199 |
| 200 } |
| 201 else{ |
| 202 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT
; /* adding u */ |
| 203 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); |
| 204 } |
| 205 break; |
| 206 |
| 207 case UCNV_PRV_ESCAPE_XML_DEC: |
| 208 |
| 209 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /*
adding & */ |
| 210 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /*
adding # */ |
| 211 if(length==2){ |
| 212 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); |
| 213 } |
| 214 else{ |
| 215 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); |
| 216 } |
| 217 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */ |
| 218 break; |
| 219 |
| 220 case UCNV_PRV_ESCAPE_XML_HEX: |
| 221 |
| 222 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /*
adding & */ |
| 223 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /*
adding # */ |
| 224 valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /*
adding x */ |
| 225 if(length==2){ |
| 226 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); |
| 227 } |
| 228 else{ |
| 229 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); |
| 230 } |
| 231 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */ |
| 232 break; |
| 233 |
| 234 case UCNV_PRV_ESCAPE_UNICODE: |
| 235 valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOIN
T; /* adding { */ |
| 236 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /*
adding U */ |
| 237 valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /*
adding + */ |
| 238 if (length == 2) { |
| 239 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); |
| 240 } else { |
| 241 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); |
| 242 } |
| 243 valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOI
NT; /* adding } */ |
| 244 break; |
| 245 |
| 246 case UCNV_PRV_ESCAPE_CSS2: |
| 247 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /*
adding \ */ |
| 248 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE
_STRING_LENGTH - valueStringLength, codePoint, 16, 0); |
| 249 /* Always add space character, becase the next character might be whit
espace, |
| 250 which would erroneously be considered the termination of the escape
sequence. */ |
| 251 valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; |
| 252 break; |
| 253 |
| 254 default: |
| 255 while (i < length) |
| 256 { |
| 257 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CO
DEPOINT; /* adding % */ |
| 258 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;
/* adding U */ |
| 259 valueStringLength += uprv_itou (valueString + valueStringLength, V
ALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); |
| 260 } |
| 261 } |
| 262 } |
| 263 myValueSource = valueString; |
| 264 |
| 265 /* reset the error */ |
| 266 *err = U_ZERO_ERROR; |
| 267 |
| 268 ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLen
gth, 0, err); |
| 269 |
| 270 ucnv_setFromUCallBack (fromArgs->converter, |
| 271 original, |
| 272 originalContext, |
| 273 &ignoredCallback, |
| 274 &ignoredContext, |
| 275 &err2); |
| 276 if (U_FAILURE (err2)) |
| 277 { |
| 278 *err = err2; |
| 279 return; |
| 280 } |
| 281 |
| 282 return; |
| 283 } |
| 284 |
| 285 |
| 286 |
| 287 U_CAPI void U_EXPORT2 |
| 288 UCNV_TO_U_CALLBACK_SKIP ( |
| 289 const void *context, |
| 290 UConverterToUnicodeArgs *toArgs, |
| 291 const char* codeUnits, |
| 292 int32_t length, |
| 293 UConverterCallbackReason reason, |
| 294 UErrorCode * err) |
| 295 { |
| 296 if (reason <= UCNV_IRREGULAR) |
| 297 { |
| 298 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL &&
reason == UCNV_UNASSIGNED)) |
| 299 { |
| 300 *err = U_ZERO_ERROR; |
| 301 } |
| 302 /* else the caller must have set the error code accordingly. */ |
| 303 } |
| 304 /* else ignore the reset, close and clone calls. */ |
| 305 } |
| 306 |
| 307 U_CAPI void U_EXPORT2 |
| 308 UCNV_TO_U_CALLBACK_SUBSTITUTE ( |
| 309 const void *context, |
| 310 UConverterToUnicodeArgs *toArgs, |
| 311 const char* codeUnits, |
| 312 int32_t length, |
| 313 UConverterCallbackReason reason, |
| 314 UErrorCode * err) |
| 315 { |
| 316 if (reason <= UCNV_IRREGULAR) |
| 317 { |
| 318 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL &&
reason == UCNV_UNASSIGNED)) |
| 319 { |
| 320 *err = U_ZERO_ERROR; |
| 321 ucnv_cbToUWriteSub(toArgs,0,err); |
| 322 } |
| 323 /* else the caller must have set the error code accordingly. */ |
| 324 } |
| 325 /* else ignore the reset, close and clone calls. */ |
| 326 } |
| 327 |
| 328 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, |
| 329 *and uses that as the substitution sequence |
| 330 */ |
| 331 U_CAPI void U_EXPORT2 |
| 332 UCNV_TO_U_CALLBACK_ESCAPE ( |
| 333 const void *context, |
| 334 UConverterToUnicodeArgs *toArgs, |
| 335 const char* codeUnits, |
| 336 int32_t length, |
| 337 UConverterCallbackReason reason, |
| 338 UErrorCode * err) |
| 339 { |
| 340 UChar uniValueString[VALUE_STRING_LENGTH]; |
| 341 int32_t valueStringLength = 0; |
| 342 int32_t i = 0; |
| 343 |
| 344 if (reason > UCNV_IRREGULAR) |
| 345 { |
| 346 return; |
| 347 } |
| 348 |
| 349 if(context==NULL) |
| 350 { |
| 351 while (i < length) |
| 352 { |
| 353 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_C
ODEPOINT; /* adding % */ |
| 354 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;
/* adding X */ |
| 355 valueStringLength += uprv_itou (uniValueString + valueStringLength,
VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); |
| 356 } |
| 357 } |
| 358 else |
| 359 { |
| 360 switch(*((char*)context)) |
| 361 { |
| 362 case UCNV_PRV_ESCAPE_XML_DEC: |
| 363 while (i < length) |
| 364 { |
| 365 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPO
INT; /* adding & */ |
| 366 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEP
OINT; /* adding # */ |
| 367 valueStringLength += uprv_itou (uniValueString + valueStringLeng
th, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); |
| 368 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_
CODEPOINT; /* adding ; */ |
| 369 } |
| 370 break; |
| 371 |
| 372 case UCNV_PRV_ESCAPE_XML_HEX: |
| 373 while (i < length) |
| 374 { |
| 375 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPO
INT; /* adding & */ |
| 376 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEP
OINT; /* adding # */ |
| 377 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODE
POINT; /* adding x */ |
| 378 valueStringLength += uprv_itou (uniValueString + valueStringLeng
th, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); |
| 379 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_
CODEPOINT; /* adding ; */ |
| 380 } |
| 381 break; |
| 382 case UCNV_PRV_ESCAPE_C: |
| 383 while (i < length) |
| 384 { |
| 385 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOI
NT; /* adding \ */ |
| 386 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODE
POINT; /* adding x */ |
| 387 valueStringLength += uprv_itou (uniValueString + valueStringLeng
th, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); |
| 388 } |
| 389 break; |
| 390 default: |
| 391 while (i < length) |
| 392 { |
| 393 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SI
GN_CODEPOINT; /* adding % */ |
| 394 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOIN
T; /* adding X */ |
| 395 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENG
TH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); |
| 396 valueStringLength += 2; |
| 397 } |
| 398 } |
| 399 } |
| 400 /* reset the error */ |
| 401 *err = U_ZERO_ERROR; |
| 402 |
| 403 ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); |
| 404 } |
| 405 |
| 406 #endif |
OLD | NEW |