OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 2000-2009, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * file name: ucnvhz.c |
| 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) |
| 9 * indentation:4 |
| 10 * |
| 11 * created on: 2000oct16 |
| 12 * created by: Ram Viswanadha |
| 13 * 10/31/2000 Ram Implemented offsets logic function |
| 14 * |
| 15 */ |
| 16 |
| 17 #include "unicode/utypes.h" |
| 18 |
| 19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 20 |
| 21 #include "cmemory.h" |
| 22 #include "unicode/ucnv.h" |
| 23 #include "unicode/ucnv_cb.h" |
| 24 #include "unicode/uset.h" |
| 25 #include "ucnv_bld.h" |
| 26 #include "ucnv_cnv.h" |
| 27 #include "ucnv_imp.h" |
| 28 |
| 29 #define UCNV_TILDE 0x7E /* ~ */ |
| 30 #define UCNV_OPEN_BRACE 0x7B /* { */ |
| 31 #define UCNV_CLOSE_BRACE 0x7D /* } */ |
| 32 #define SB_ESCAPE "\x7E\x7D" |
| 33 #define DB_ESCAPE "\x7E\x7B" |
| 34 #define TILDE_ESCAPE "\x7E\x7E" |
| 35 #define ESC_LEN 2 |
| 36 |
| 37 |
| 38 #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, le
n,sourceIndex){ \ |
| 39 while(len-->0){
\ |
| 40 if(targetIndex < targetLength){
\ |
| 41 args->target[targetIndex] = (unsigned char) *strToAppend;
\ |
| 42 if(args->offsets!=NULL){
\ |
| 43 *(offsets++) = sourceIndex-1;
\ |
| 44 }
\ |
| 45 targetIndex++;
\ |
| 46 }
\ |
| 47 else{
\ |
| 48 args->converter->charErrorBuffer[(int)args->converter->charErrorBuff
erLength++] = (unsigned char) *strToAppend; \ |
| 49 *err =U_BUFFER_OVERFLOW_ERROR;
\ |
| 50 }
\ |
| 51 strToAppend++;
\ |
| 52 }
\ |
| 53 } |
| 54 |
| 55 |
| 56 typedef struct{ |
| 57 UConverter* gbConverter; |
| 58 int32_t targetIndex; |
| 59 int32_t sourceIndex; |
| 60 UBool isEscapeAppended; |
| 61 UBool isStateDBCS; |
| 62 UBool isTargetUCharDBCS; |
| 63 UBool isEmptySegment; |
| 64 }UConverterDataHZ; |
| 65 |
| 66 |
| 67 |
| 68 static void |
| 69 _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
| 70 UConverter *gbConverter; |
| 71 if(pArgs->onlyTestIsLoadable) { |
| 72 ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result
*/ |
| 73 return; |
| 74 } |
| 75 gbConverter = ucnv_open("GBK", errorCode); |
| 76 if(U_FAILURE(*errorCode)) { |
| 77 return; |
| 78 } |
| 79 cnv->toUnicodeStatus = 0; |
| 80 cnv->fromUnicodeStatus= 0; |
| 81 cnv->mode=0; |
| 82 cnv->fromUChar32=0x0000; |
| 83 cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ)); |
| 84 if(cnv->extraInfo != NULL){ |
| 85 uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ)); |
| 86 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; |
| 87 } |
| 88 else { |
| 89 ucnv_close(gbConverter); |
| 90 *errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 91 return; |
| 92 } |
| 93 } |
| 94 |
| 95 static void |
| 96 _HZClose(UConverter *cnv){ |
| 97 if(cnv->extraInfo != NULL) { |
| 98 ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); |
| 99 if(!cnv->isExtraLocal) { |
| 100 uprv_free(cnv->extraInfo); |
| 101 } |
| 102 cnv->extraInfo = NULL; |
| 103 } |
| 104 } |
| 105 |
| 106 static void |
| 107 _HZReset(UConverter *cnv, UConverterResetChoice choice){ |
| 108 if(choice<=UCNV_RESET_TO_UNICODE) { |
| 109 cnv->toUnicodeStatus = 0; |
| 110 cnv->mode=0; |
| 111 if(cnv->extraInfo != NULL){ |
| 112 ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; |
| 113 ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; |
| 114 } |
| 115 } |
| 116 if(choice!=UCNV_RESET_TO_UNICODE) { |
| 117 cnv->fromUnicodeStatus= 0; |
| 118 cnv->fromUChar32=0x0000; |
| 119 if(cnv->extraInfo != NULL){ |
| 120 ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; |
| 121 ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; |
| 122 ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; |
| 123 ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; |
| 124 } |
| 125 } |
| 126 } |
| 127 |
| 128 /**************************************HZ Encoding******************************
******************* |
| 129 * Rules for HZ encoding |
| 130 * |
| 131 * In ASCII mode, a byte is interpreted as an ASCII character, unless a |
| 132 * '~' is encountered. The character '~' is an escape character. By |
| 133 * convention, it must be immediately followed ONLY by '~', '{' or '\n' |
| 134 * (<LF>), with the following special meaning. |
| 135 |
| 136 * 1. The escape sequence '~~' is interpreted as a '~'. |
| 137 * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. |
| 138 * 3. The escape sequence '~\n' is a line-continuation marker to be |
| 139 * consumed with no output produced. |
| 140 * In GB mode, characters are interpreted two bytes at a time as (pure) |
| 141 * GB codes until the escape-from-GB code '~}' is read. This code |
| 142 * switches the mode from GB back to ASCII. (Note that the escape- |
| 143 * from-GB code '~}' ($7E7D) is outside the defined GB range.) |
| 144 * |
| 145 * Source: RFC 1842 |
| 146 * |
| 147 * Note that the formal syntax in RFC 1842 is invalid. I assume that the |
| 148 * intended definition of single-byte-segment is as follows (pedberg): |
| 149 * single-byte-segment = single-byte-seq 1*single-byte-char |
| 150 */ |
| 151 |
| 152 |
| 153 static void |
| 154 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, |
| 155 UErrorCode* err){ |
| 156 char tempBuf[2]; |
| 157 const char *mySource = ( char *) args->source; |
| 158 UChar *myTarget = args->target; |
| 159 const char *mySourceLimit = args->sourceLimit; |
| 160 UChar32 targetUniChar = 0x0000; |
| 161 int32_t mySourceChar = 0x0000; |
| 162 UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); |
| 163 tempBuf[0]=0; |
| 164 tempBuf[1]=0; |
| 165 |
| 166 /* Calling code already handles this situation. */ |
| 167 /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (my
SourceLimit < args->source)){ |
| 168 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 169 return; |
| 170 }*/ |
| 171 |
| 172 while(mySource< mySourceLimit){ |
| 173 |
| 174 if(myTarget < args->targetLimit){ |
| 175 |
| 176 mySourceChar= (unsigned char) *mySource++; |
| 177 |
| 178 if(args->converter->mode == UCNV_TILDE) { |
| 179 /* second byte after ~ */ |
| 180 args->converter->mode=0; |
| 181 switch(mySourceChar) { |
| 182 case 0x0A: |
| 183 /* no output for ~\n (line-continuation marker) */ |
| 184 continue; |
| 185 case UCNV_TILDE: |
| 186 if(args->offsets) { |
| 187 args->offsets[myTarget - args->target]=(int32_t)(mySourc
e - args->source - 2); |
| 188 } |
| 189 *(myTarget++)=(UChar)mySourceChar; |
| 190 myData->isEmptySegment = FALSE; |
| 191 continue; |
| 192 case UCNV_OPEN_BRACE: |
| 193 case UCNV_CLOSE_BRACE: |
| 194 myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); |
| 195 if (myData->isEmptySegment) { |
| 196 myData->isEmptySegment = FALSE; /* we are handling it, r
eset to avoid future spurious errors */ |
| 197 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 198 args->converter->toUCallbackReason = UCNV_IRREGULAR; |
| 199 args->converter->toUBytes[0] = UCNV_TILDE; |
| 200 args->converter->toUBytes[1] = mySourceChar; |
| 201 args->converter->toULength = 2; |
| 202 args->target = myTarget; |
| 203 args->source = mySource; |
| 204 return; |
| 205 } |
| 206 myData->isEmptySegment = TRUE; |
| 207 continue; |
| 208 default: |
| 209 /* if the first byte is equal to TILDE and the trail byte |
| 210 * is not a valid byte then it is an error condition |
| 211 */ |
| 212 /* |
| 213 * Ticket 5691: consistent illegal sequences: |
| 214 * - We include at least the first byte in the illegal seque
nce. |
| 215 * - If any of the non-initial bytes could be the start of a
character, |
| 216 * we stop the illegal sequence before the first one of th
ose. |
| 217 */ |
| 218 myData->isEmptySegment = FALSE; /* different error here, res
et this to avoid spurious future error */ |
| 219 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 220 args->converter->toUBytes[0] = UCNV_TILDE; |
| 221 if( myData->isStateDBCS ? |
| 222 (0x21 <= mySourceChar && mySourceChar <= 0x7e) : |
| 223 mySourceChar <= 0x7f |
| 224 ) { |
| 225 /* The current byte could be the start of a character: B
ack it out. */ |
| 226 args->converter->toULength = 1; |
| 227 --mySource; |
| 228 } else { |
| 229 /* Include the current byte in the illegal sequence. */ |
| 230 args->converter->toUBytes[1] = mySourceChar; |
| 231 args->converter->toULength = 2; |
| 232 } |
| 233 args->target = myTarget; |
| 234 args->source = mySource; |
| 235 return; |
| 236 } |
| 237 } else if(myData->isStateDBCS) { |
| 238 if(args->converter->toUnicodeStatus == 0x00){ |
| 239 /* lead byte */ |
| 240 if(mySourceChar == UCNV_TILDE) { |
| 241 args->converter->mode = UCNV_TILDE; |
| 242 } else { |
| 243 /* add another bit to distinguish a 0 byte from not havi
ng seen a lead byte */ |
| 244 args->converter->toUnicodeStatus = (uint32_t) (mySourceC
har | 0x100); |
| 245 myData->isEmptySegment = FALSE; /* the segment has somet
hing, either valid or will produce a different error, so reset this */ |
| 246 } |
| 247 continue; |
| 248 } |
| 249 else{ |
| 250 /* trail byte */ |
| 251 int leadIsOk, trailIsOk; |
| 252 uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; |
| 253 targetUniChar = 0xffff; |
| 254 /* |
| 255 * Ticket 5691: consistent illegal sequences: |
| 256 * - We include at least the first byte in the illegal seque
nce. |
| 257 * - If any of the non-initial bytes could be the start of a
character, |
| 258 * we stop the illegal sequence before the first one of th
ose. |
| 259 * |
| 260 * In HZ DBCS, if the second byte is in the 21..7e range, |
| 261 * we report only the first byte as the illegal sequence. |
| 262 * Otherwise we convert or report the pair of bytes. |
| 263 */ |
| 264 leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); |
| 265 trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); |
| 266 if (leadIsOk && trailIsOk) { |
| 267 tempBuf[0] = (char) (leadByte+0x80) ; |
| 268 tempBuf[1] = (char) (mySourceChar+0x80); |
| 269 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbCo
nverter->sharedData, |
| 270 tempBuf, 2, args->converter->useFallback); |
| 271 mySourceChar= (leadByte << 8) | mySourceChar; |
| 272 } else if (trailIsOk) { |
| 273 /* report a single illegal byte and continue with the fo
llowing DBCS starter byte */ |
| 274 --mySource; |
| 275 mySourceChar = (int32_t)leadByte; |
| 276 } else { |
| 277 /* report a pair of illegal bytes if the second byte is
not a DBCS starter */ |
| 278 /* add another bit so that the code below writes 2 bytes
in case of error */ |
| 279 mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; |
| 280 } |
| 281 args->converter->toUnicodeStatus =0x00; |
| 282 } |
| 283 } |
| 284 else{ |
| 285 if(mySourceChar == UCNV_TILDE) { |
| 286 args->converter->mode = UCNV_TILDE; |
| 287 continue; |
| 288 } else if(mySourceChar <= 0x7f) { |
| 289 targetUniChar = (UChar)mySourceChar; /* ASCII */ |
| 290 myData->isEmptySegment = FALSE; /* the segment has something
valid */ |
| 291 } else { |
| 292 targetUniChar = 0xffff; |
| 293 myData->isEmptySegment = FALSE; /* different error here, res
et this to avoid spurious future error */ |
| 294 } |
| 295 } |
| 296 if(targetUniChar < 0xfffe){ |
| 297 if(args->offsets) { |
| 298 args->offsets[myTarget - args->target]=(int32_t)(mySource -
args->source - 1-(myData->isStateDBCS)); |
| 299 } |
| 300 |
| 301 *(myTarget++)=(UChar)targetUniChar; |
| 302 } |
| 303 else /* targetUniChar>=0xfffe */ { |
| 304 if(targetUniChar == 0xfffe){ |
| 305 *err = U_INVALID_CHAR_FOUND; |
| 306 } |
| 307 else{ |
| 308 *err = U_ILLEGAL_CHAR_FOUND; |
| 309 } |
| 310 if(mySourceChar > 0xff){ |
| 311 args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); |
| 312 args->converter->toUBytes[1] = (uint8_t)mySourceChar; |
| 313 args->converter->toULength=2; |
| 314 } |
| 315 else{ |
| 316 args->converter->toUBytes[0] = (uint8_t)mySourceChar; |
| 317 args->converter->toULength=1; |
| 318 } |
| 319 break; |
| 320 } |
| 321 } |
| 322 else{ |
| 323 *err =U_BUFFER_OVERFLOW_ERROR; |
| 324 break; |
| 325 } |
| 326 } |
| 327 |
| 328 args->target = myTarget; |
| 329 args->source = mySource; |
| 330 } |
| 331 |
| 332 |
| 333 static void |
| 334 UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, |
| 335 UErrorCode * err){ |
| 336 const UChar *mySource = args->source; |
| 337 char *myTarget = args->target; |
| 338 int32_t* offsets = args->offsets; |
| 339 int32_t mySourceIndex = 0; |
| 340 int32_t myTargetIndex = 0; |
| 341 int32_t targetLength = (int32_t)(args->targetLimit - myTarget); |
| 342 int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); |
| 343 int32_t length=0; |
| 344 uint32_t targetUniChar = 0x0000; |
| 345 UChar32 mySourceChar = 0x0000; |
| 346 UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraI
nfo; |
| 347 UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; |
| 348 UBool oldIsTargetUCharDBCS = isTargetUCharDBCS; |
| 349 int len =0; |
| 350 const char* escSeq=NULL; |
| 351 |
| 352 /* Calling code already handles this situation. */ |
| 353 /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->
sourceLimit < args->source)){ |
| 354 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 355 return; |
| 356 }*/ |
| 357 if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { |
| 358 goto getTrail; |
| 359 } |
| 360 /*writing the char to the output stream */ |
| 361 while (mySourceIndex < mySourceLength){ |
| 362 targetUniChar = missingCharMarker; |
| 363 if (myTargetIndex < targetLength){ |
| 364 |
| 365 mySourceChar = (UChar) mySource[mySourceIndex++]; |
| 366 |
| 367 |
| 368 oldIsTargetUCharDBCS = isTargetUCharDBCS; |
| 369 if(mySourceChar ==UCNV_TILDE){ |
| 370 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",er
r,2,&mySourceIndex);*/ |
| 371 len = ESC_LEN; |
| 372 escSeq = TILDE_ESCAPE; |
| 373 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,er
r,len,mySourceIndex); |
| 374 continue; |
| 375 } else if(mySourceChar <= 0x7f) { |
| 376 length = 1; |
| 377 targetUniChar = mySourceChar; |
| 378 } else { |
| 379 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->share
dData, |
| 380 mySourceChar,&targetUniChar,args->converter->useFallback); |
| 381 /* we can only use lead bytes 21..7D and trail bytes 21..7E */ |
| 382 if( length == 2 && |
| 383 (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && |
| 384 (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) |
| 385 ) { |
| 386 targetUniChar -= 0x8080; |
| 387 } else { |
| 388 targetUniChar = missingCharMarker; |
| 389 } |
| 390 } |
| 391 if (targetUniChar != missingCharMarker){ |
| 392 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(
targetUniChar>0x00FF); |
| 393 if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterDat
a->isEscapeAppended ){ |
| 394 /*Shifting from a double byte to single byte mode*/ |
| 395 if(!isTargetUCharDBCS){ |
| 396 len =ESC_LEN; |
| 397 escSeq = SB_ESCAPE; |
| 398 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, e
scSeq,err,len,mySourceIndex); |
| 399 myConverterData->isEscapeAppended = TRUE; |
| 400 } |
| 401 else{ /* Shifting from a single byte to double byte mode*/ |
| 402 len =ESC_LEN; |
| 403 escSeq = DB_ESCAPE; |
| 404 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, e
scSeq,err,len,mySourceIndex); |
| 405 myConverterData->isEscapeAppended = TRUE; |
| 406 |
| 407 } |
| 408 } |
| 409 |
| 410 if(isTargetUCharDBCS){ |
| 411 if( myTargetIndex <targetLength){ |
| 412 myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); |
| 413 if(offsets){ |
| 414 *(offsets++) = mySourceIndex-1; |
| 415 } |
| 416 if(myTargetIndex < targetLength){ |
| 417 myTarget[myTargetIndex++] =(char) targetUniChar; |
| 418 if(offsets){ |
| 419 *(offsets++) = mySourceIndex-1; |
| 420 } |
| 421 }else{ |
| 422 args->converter->charErrorBuffer[args->converter->ch
arErrorBufferLength++] = (char) targetUniChar; |
| 423 *err = U_BUFFER_OVERFLOW_ERROR; |
| 424 } |
| 425 }else{ |
| 426 args->converter->charErrorBuffer[args->converter->charEr
rorBufferLength++] =(char) (targetUniChar >> 8); |
| 427 args->converter->charErrorBuffer[args->converter->charEr
rorBufferLength++] = (char) targetUniChar; |
| 428 *err = U_BUFFER_OVERFLOW_ERROR; |
| 429 } |
| 430 |
| 431 }else{ |
| 432 if( myTargetIndex <targetLength){ |
| 433 myTarget[myTargetIndex++] = (char) (targetUniChar ); |
| 434 if(offsets){ |
| 435 *(offsets++) = mySourceIndex-1; |
| 436 } |
| 437 |
| 438 }else{ |
| 439 args->converter->charErrorBuffer[args->converter->charEr
rorBufferLength++] = (char) targetUniChar; |
| 440 *err = U_BUFFER_OVERFLOW_ERROR; |
| 441 } |
| 442 } |
| 443 |
| 444 } |
| 445 else{ |
| 446 /* oops.. the code point is unassigned */ |
| 447 /*Handle surrogates */ |
| 448 /*check if the char is a First surrogate*/ |
| 449 if(UTF_IS_SURROGATE(mySourceChar)) { |
| 450 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) { |
| 451 args->converter->fromUChar32=mySourceChar; |
| 452 getTrail: |
| 453 /*look ahead to find the trail surrogate*/ |
| 454 if(mySourceIndex < mySourceLength) { |
| 455 /* test the following code unit */ |
| 456 UChar trail=(UChar) args->source[mySourceIndex]; |
| 457 if(UTF_IS_SECOND_SURROGATE(trail)) { |
| 458 ++mySourceIndex; |
| 459 mySourceChar=UTF16_GET_PAIR_VALUE(args->converte
r->fromUChar32, trail); |
| 460 args->converter->fromUChar32=0x00; |
| 461 /* there are no surrogates in GB2312*/ |
| 462 *err = U_INVALID_CHAR_FOUND; |
| 463 /* exit this condition tree */ |
| 464 } else { |
| 465 /* this is an unmatched lead code unit (1st surr
ogate) */ |
| 466 /* callback(illegal) */ |
| 467 *err=U_ILLEGAL_CHAR_FOUND; |
| 468 } |
| 469 } else { |
| 470 /* no more input */ |
| 471 *err = U_ZERO_ERROR; |
| 472 } |
| 473 } else { |
| 474 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 475 /* callback(illegal) */ |
| 476 *err=U_ILLEGAL_CHAR_FOUND; |
| 477 } |
| 478 } else { |
| 479 /* callback(unassigned) for a BMP code point */ |
| 480 *err = U_INVALID_CHAR_FOUND; |
| 481 } |
| 482 |
| 483 args->converter->fromUChar32=mySourceChar; |
| 484 break; |
| 485 } |
| 486 } |
| 487 else{ |
| 488 *err = U_BUFFER_OVERFLOW_ERROR; |
| 489 break; |
| 490 } |
| 491 targetUniChar=missingCharMarker; |
| 492 } |
| 493 |
| 494 args->target += myTargetIndex; |
| 495 args->source += mySourceIndex; |
| 496 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; |
| 497 } |
| 498 |
| 499 static void |
| 500 _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *e
rr) { |
| 501 UConverter *cnv = args->converter; |
| 502 UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; |
| 503 char *p; |
| 504 char buffer[4]; |
| 505 p = buffer; |
| 506 |
| 507 if( convData->isTargetUCharDBCS){ |
| 508 *p++= UCNV_TILDE; |
| 509 *p++= UCNV_CLOSE_BRACE; |
| 510 convData->isTargetUCharDBCS=FALSE; |
| 511 } |
| 512 *p++= (char)cnv->subChars[0]; |
| 513 |
| 514 ucnv_cbFromUWriteBytes(args, |
| 515 buffer, (int32_t)(p - buffer), |
| 516 offsetIndex, err); |
| 517 } |
| 518 |
| 519 /* |
| 520 * Structure for cloning an HZ converter into a single memory block. |
| 521 * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, |
| 522 * and then ucnv_safeClone() of the sub-converter may additionally align |
| 523 * subCnv inside the cloneHZStruct, for which we need the deadSpace after |
| 524 * subCnv. This is because UAlignedMemory may be larger than the actually |
| 525 * necessary alignment size for the platform. |
| 526 * The other cloneHZStruct fields will not be moved around, |
| 527 * and are aligned properly with cloneHZStruct's alignment. |
| 528 */ |
| 529 struct cloneHZStruct |
| 530 { |
| 531 UConverter cnv; |
| 532 UConverter subCnv; |
| 533 UAlignedMemory deadSpace; |
| 534 UConverterDataHZ mydata; |
| 535 }; |
| 536 |
| 537 |
| 538 static UConverter * |
| 539 _HZ_SafeClone(const UConverter *cnv, |
| 540 void *stackBuffer, |
| 541 int32_t *pBufferSize, |
| 542 UErrorCode *status) |
| 543 { |
| 544 struct cloneHZStruct * localClone; |
| 545 int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); |
| 546 |
| 547 if (U_FAILURE(*status)){ |
| 548 return 0; |
| 549 } |
| 550 |
| 551 if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pB
ufferSize */ |
| 552 *pBufferSize = bufferSizeNeeded; |
| 553 return 0; |
| 554 } |
| 555 |
| 556 localClone = (struct cloneHZStruct *)stackBuffer; |
| 557 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ |
| 558 |
| 559 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); |
| 560 localClone->cnv.extraInfo = &localClone->mydata; |
| 561 localClone->cnv.isExtraLocal = TRUE; |
| 562 |
| 563 /* deep-clone the sub-converter */ |
| 564 size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include si
ze of padding */ |
| 565 ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = |
| 566 ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localC
lone->subCnv, &size, status); |
| 567 |
| 568 return &localClone->cnv; |
| 569 } |
| 570 |
| 571 static void |
| 572 _HZ_GetUnicodeSet(const UConverter *cnv, |
| 573 const USetAdder *sa, |
| 574 UConverterUnicodeSet which, |
| 575 UErrorCode *pErrorCode) { |
| 576 /* HZ converts all of ASCII */ |
| 577 sa->addRange(sa->set, 0, 0x7f); |
| 578 |
| 579 /* add all of the code points that the sub-converter handles */ |
| 580 ucnv_MBCSGetFilteredUnicodeSetForUnicode( |
| 581 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, |
| 582 sa, which, UCNV_SET_FILTER_HZ, |
| 583 pErrorCode); |
| 584 } |
| 585 |
| 586 static const UConverterImpl _HZImpl={ |
| 587 |
| 588 UCNV_HZ, |
| 589 |
| 590 NULL, |
| 591 NULL, |
| 592 |
| 593 _HZOpen, |
| 594 _HZClose, |
| 595 _HZReset, |
| 596 |
| 597 UConverter_toUnicode_HZ_OFFSETS_LOGIC, |
| 598 UConverter_toUnicode_HZ_OFFSETS_LOGIC, |
| 599 UConverter_fromUnicode_HZ_OFFSETS_LOGIC, |
| 600 UConverter_fromUnicode_HZ_OFFSETS_LOGIC, |
| 601 NULL, |
| 602 |
| 603 NULL, |
| 604 NULL, |
| 605 _HZ_WriteSub, |
| 606 _HZ_SafeClone, |
| 607 _HZ_GetUnicodeSet |
| 608 }; |
| 609 |
| 610 static const UConverterStaticData _HZStaticData={ |
| 611 sizeof(UConverterStaticData), |
| 612 "HZ", |
| 613 0, |
| 614 UCNV_IBM, |
| 615 UCNV_HZ, |
| 616 1, |
| 617 4, |
| 618 { 0x1a, 0, 0, 0 }, |
| 619 1, |
| 620 FALSE, |
| 621 FALSE, |
| 622 0, |
| 623 0, |
| 624 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ |
| 625 |
| 626 }; |
| 627 |
| 628 |
| 629 const UConverterSharedData _HZData={ |
| 630 sizeof(UConverterSharedData), |
| 631 ~((uint32_t) 0), |
| 632 NULL, |
| 633 NULL, |
| 634 &_HZStaticData, |
| 635 FALSE, |
| 636 &_HZImpl, |
| 637 0 |
| 638 }; |
| 639 |
| 640 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
OLD | NEW |