| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2000-2006, International Business Machines | 3 * Copyright (C) 2000-2007, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** | 5 ********************************************************************** |
| 6 * file name: ucnvhz.c | 6 * file name: ucnvhz.c |
| 7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
| 9 * indentation:4 | 9 * indentation:4 |
| 10 * | 10 * |
| 11 * created on: 2000oct16 | 11 * created on: 2000oct16 |
| 12 * created by: Ram Viswanadha | 12 * created by: Ram Viswanadha |
| 13 * 10/31/2000 Ram Implemented offsets logic function | 13 * 10/31/2000 Ram Implemented offsets logic function |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 52 } | 52 } |
| 53 | 53 |
| 54 | 54 |
| 55 typedef struct{ | 55 typedef struct{ |
| 56 UConverter* gbConverter; | 56 UConverter* gbConverter; |
| 57 int32_t targetIndex; | 57 int32_t targetIndex; |
| 58 int32_t sourceIndex; | 58 int32_t sourceIndex; |
| 59 UBool isEscapeAppended; | 59 UBool isEscapeAppended; |
| 60 UBool isStateDBCS; | 60 UBool isStateDBCS; |
| 61 UBool isTargetUCharDBCS; | 61 UBool isTargetUCharDBCS; |
| 62 UBool isEmptySegment; |
| 62 }UConverterDataHZ; | 63 }UConverterDataHZ; |
| 63 | 64 |
| 64 | 65 |
| 65 | 66 |
| 66 static void | 67 static void |
| 67 _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, U
ErrorCode *errorCode){ | 68 _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, U
ErrorCode *errorCode){ |
| 68 cnv->toUnicodeStatus = 0; | 69 cnv->toUnicodeStatus = 0; |
| 69 cnv->fromUnicodeStatus= 0; | 70 cnv->fromUnicodeStatus= 0; |
| 70 cnv->mode=0; | 71 cnv->mode=0; |
| 71 cnv->fromUChar32=0x0000; | 72 cnv->fromUChar32=0x0000; |
| 72 cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ)); | 73 cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ)); |
| 73 if(cnv->extraInfo != NULL){ | 74 if(cnv->extraInfo != NULL){ |
| 74 uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ)); | 75 uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ)); |
| 75 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",
errorCode); | 76 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("GBK",error
Code); |
| 76 } | 77 } |
| 77 else { | 78 else { |
| 78 *errorCode = U_MEMORY_ALLOCATION_ERROR; | 79 *errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 79 return; | 80 return; |
| 80 } | 81 } |
| 81 } | 82 } |
| 82 | 83 |
| 83 static void | 84 static void |
| 84 _HZClose(UConverter *cnv){ | 85 _HZClose(UConverter *cnv){ |
| 85 if(cnv->extraInfo != NULL) { | 86 if(cnv->extraInfo != NULL) { |
| 86 ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); | 87 ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); |
| 87 if(!cnv->isExtraLocal) { | 88 if(!cnv->isExtraLocal) { |
| 88 uprv_free(cnv->extraInfo); | 89 uprv_free(cnv->extraInfo); |
| 89 } | 90 } |
| 90 cnv->extraInfo = NULL; | 91 cnv->extraInfo = NULL; |
| 91 } | 92 } |
| 92 } | 93 } |
| 93 | 94 |
| 94 static void | 95 static void |
| 95 _HZReset(UConverter *cnv, UConverterResetChoice choice){ | 96 _HZReset(UConverter *cnv, UConverterResetChoice choice){ |
| 96 if(choice<=UCNV_RESET_TO_UNICODE) { | 97 if(choice<=UCNV_RESET_TO_UNICODE) { |
| 97 cnv->toUnicodeStatus = 0; | 98 cnv->toUnicodeStatus = 0; |
| 98 cnv->mode=0; | 99 cnv->mode=0; |
| 99 if(cnv->extraInfo != NULL){ | 100 if(cnv->extraInfo != NULL){ |
| 100 ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; | 101 ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; |
| 102 ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; |
| 101 } | 103 } |
| 102 } | 104 } |
| 103 if(choice!=UCNV_RESET_TO_UNICODE) { | 105 if(choice!=UCNV_RESET_TO_UNICODE) { |
| 104 cnv->fromUnicodeStatus= 0; | 106 cnv->fromUnicodeStatus= 0; |
| 105 cnv->fromUChar32=0x0000; | 107 cnv->fromUChar32=0x0000; |
| 106 if(cnv->extraInfo != NULL){ | 108 if(cnv->extraInfo != NULL){ |
| 107 ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; | 109 ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; |
| 108 ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; | 110 ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; |
| 109 ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; | 111 ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; |
| 110 ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; | 112 ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; |
| (...skipping 12 matching lines...) Expand all Loading... |
| 123 * 1. The escape sequence '~~' is interpreted as a '~'. | 125 * 1. The escape sequence '~~' is interpreted as a '~'. |
| 124 * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. | 126 * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. |
| 125 * 3. The escape sequence '~\n' is a line-continuation marker to be | 127 * 3. The escape sequence '~\n' is a line-continuation marker to be |
| 126 * consumed with no output produced. | 128 * consumed with no output produced. |
| 127 * In GB mode, characters are interpreted two bytes at a time as (pure) | 129 * In GB mode, characters are interpreted two bytes at a time as (pure) |
| 128 * GB codes until the escape-from-GB code '~}' is read. This code | 130 * GB codes until the escape-from-GB code '~}' is read. This code |
| 129 * switches the mode from GB back to ASCII. (Note that the escape- | 131 * switches the mode from GB back to ASCII. (Note that the escape- |
| 130 * from-GB code '~}' ($7E7D) is outside the defined GB range.) | 132 * from-GB code '~}' ($7E7D) is outside the defined GB range.) |
| 131 * | 133 * |
| 132 * Source: RFC 1842 | 134 * Source: RFC 1842 |
| 135 * |
| 136 * Note that the formal syntax in RFC 1842 is invalid. I assume that the |
| 137 * intended definition of single-byte-segment is as follows (pedberg): |
| 138 * single-byte-segment = single-byte-seq 1*single-byte-char |
| 133 */ | 139 */ |
| 134 | 140 |
| 135 | 141 |
| 136 static void | 142 static void |
| 137 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, | 143 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, |
| 138 UErrorCode* err){ | 144 UErrorCode* err){ |
| 139 char tempBuf[2]; | 145 char tempBuf[2]; |
| 140 const char *mySource = ( char *) args->source; | 146 const char *mySource = ( char *) args->source; |
| 141 UChar *myTarget = args->target; | 147 UChar *myTarget = args->target; |
| 142 const char *mySourceLimit = args->sourceLimit; | 148 const char *mySourceLimit = args->sourceLimit; |
| 143 UChar32 targetUniChar = 0x0000; | 149 UChar32 targetUniChar = 0x0000; |
| 144 UChar mySourceChar = 0x0000; | 150 int32_t mySourceChar = 0x0000; |
| 145 UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); | 151 UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); |
| 146 tempBuf[0]=0; | 152 tempBuf[0]=0; |
| 147 tempBuf[1]=0; | 153 tempBuf[1]=0; |
| 148 if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySo
urceLimit < args->source)){ | 154 if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySo
urceLimit < args->source)){ |
| 149 *err = U_ILLEGAL_ARGUMENT_ERROR; | 155 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 150 return; | 156 return; |
| 151 } | 157 } |
| 152 | 158 |
| 153 while(mySource< mySourceLimit){ | 159 while(mySource< mySourceLimit){ |
| 154 | 160 |
| 155 if(myTarget < args->targetLimit){ | 161 if(myTarget < args->targetLimit){ |
| 156 | 162 |
| 157 mySourceChar= (unsigned char) *mySource++; | 163 mySourceChar= (unsigned char) *mySource++; |
| 158 | 164 |
| 159 switch(mySourceChar){ | 165 if(args->converter->mode == UCNV_TILDE) { |
| 166 /* second byte after ~ */ |
| 167 args->converter->mode=0; |
| 168 switch(mySourceChar) { |
| 160 case 0x0A: | 169 case 0x0A: |
| 161 if(args->converter->mode ==UCNV_TILDE){ | 170 /* no output for ~\n (line-continuation marker) */ |
| 162 args->converter->mode=0; | 171 continue; |
| 163 | 172 case UCNV_TILDE: |
| 173 if(args->offsets) { |
| 174 args->offsets[myTarget - args->target]=(int32_t)(mySourc
e - args->source - 2); |
| 164 } | 175 } |
| 165 *(myTarget++)=(UChar)mySourceChar; | 176 *(myTarget++)=(UChar)mySourceChar; |
| 177 myData->isEmptySegment = FALSE; |
| 166 continue; | 178 continue; |
| 167 | 179 case UCNV_OPEN_BRACE: |
| 168 case UCNV_TILDE: | 180 case UCNV_CLOSE_BRACE: |
| 169 if(args->converter->mode ==UCNV_TILDE){ | 181 myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); |
| 170 *(myTarget++)=(UChar)mySourceChar; | 182 if (myData->isEmptySegment) { |
| 171 args->converter->mode=0; | 183 myData->isEmptySegment = FALSE; /* we are handling it, r
eset to avoid future spurious errors */ |
| 172 continue; | 184 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 173 | 185 args->converter->toUCallbackReason = UCNV_IRREGULAR; |
| 186 args->converter->toUBytes[0] = UCNV_TILDE; |
| 187 args->converter->toUBytes[1] = mySourceChar; |
| 188 args->converter->toULength = 2; |
| 189 args->target = myTarget; |
| 190 args->source = mySource; |
| 191 return; |
| 174 } | 192 } |
| 175 else if(args->converter->toUnicodeStatus !=0){ | 193 myData->isEmptySegment = TRUE; |
| 176 args->converter->mode=0; | 194 continue; |
| 177 break; | |
| 178 } | |
| 179 else{ | |
| 180 args->converter->mode = UCNV_TILDE; | |
| 181 continue; | |
| 182 } | |
| 183 | |
| 184 | |
| 185 case UCNV_OPEN_BRACE: | |
| 186 if(args->converter->mode == UCNV_TILDE){ | |
| 187 args->converter->mode=0; | |
| 188 myData->isStateDBCS = TRUE; | |
| 189 continue; | |
| 190 } | |
| 191 else{ | |
| 192 break; | |
| 193 } | |
| 194 | |
| 195 | |
| 196 case UCNV_CLOSE_BRACE: | |
| 197 if(args->converter->mode == UCNV_TILDE){ | |
| 198 args->converter->mode=0; | |
| 199 myData->isStateDBCS = FALSE; | |
| 200 continue; | |
| 201 } | |
| 202 else{ | |
| 203 break; | |
| 204 } | |
| 205 | |
| 206 default: | 195 default: |
| 207 /* if the first byte is equal to TILDE and the trail byte | 196 /* if the first byte is equal to TILDE and the trail byte |
| 208 * is not a valid byte then it is an error condition | 197 * is not a valid byte then it is an error condition |
| 209 */ | 198 */ |
| 210 if(args->converter->mode == UCNV_TILDE){ | 199 /* |
| 211 args->converter->mode=0; | 200 * Ticket 5691: consistent illegal sequences: |
| 212 mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySou
rceChar & 0x00ff)+0x80)); | 201 * - We include at least the first byte in the illegal seque
nce. |
| 213 goto SAVE_STATE; | 202 * - If any of the non-initial bytes could be the start of a
character, |
| 203 * we stop the illegal sequence before the first one of th
ose. |
| 204 */ |
| 205 myData->isEmptySegment = FALSE; /* different error here, res
et this to avoid spurious future error */ |
| 206 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 207 args->converter->toUBytes[0] = UCNV_TILDE; |
| 208 if( myData->isStateDBCS ? |
| 209 (0x21 <= mySourceChar && mySourceChar <= 0x7e) : |
| 210 mySourceChar <= 0x7f |
| 211 ) { |
| 212 /* The current byte could be the start of a character: B
ack it out. */ |
| 213 args->converter->toULength = 1; |
| 214 --mySource; |
| 215 } else { |
| 216 /* Include the current byte in the illegal sequence. */ |
| 217 args->converter->toUBytes[1] = mySourceChar; |
| 218 args->converter->toULength = 2; |
| 214 } | 219 } |
| 215 | 220 args->target = myTarget; |
| 216 break; | 221 args->source = mySource; |
| 217 | 222 return; |
| 218 } | 223 } |
| 219 | 224 } else if(myData->isStateDBCS) { |
| 220 if(myData->isStateDBCS){ | |
| 221 if(args->converter->toUnicodeStatus == 0x00){ | 225 if(args->converter->toUnicodeStatus == 0x00){ |
| 222 args->converter->toUnicodeStatus = (UChar) mySourceChar; | 226 /* lead byte */ |
| 227 if(mySourceChar == UCNV_TILDE) { |
| 228 args->converter->mode = UCNV_TILDE; |
| 229 } else { |
| 230 /* add another bit to distinguish a 0 byte from not havi
ng seen a lead byte */ |
| 231 args->converter->toUnicodeStatus = (uint32_t) (mySourceC
har | 0x100); |
| 232 myData->isEmptySegment = FALSE; /* the segment has somet
hing, either valid or will produce a different error, so reset this */ |
| 233 } |
| 223 continue; | 234 continue; |
| 224 } | 235 } |
| 225 else{ | 236 else{ |
| 226 tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80)
; | 237 /* trail byte */ |
| 227 tempBuf[1] = (char) (mySourceChar+0x80); | 238 int leadIsOk, trailIsOk; |
| 228 mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x8
0) << 8) | ((mySourceChar & 0x00ff)+0x80)); | 239 uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; |
| 240 targetUniChar = 0xffff; |
| 241 /* |
| 242 * Ticket 5691: consistent illegal sequences: |
| 243 * - We include at least the first byte in the illegal seque
nce. |
| 244 * - If any of the non-initial bytes could be the start of a
character, |
| 245 * we stop the illegal sequence before the first one of th
ose. |
| 246 * |
| 247 * In HZ DBCS, if the second byte is in the 21..7e range, |
| 248 * we report only the first byte as the illegal sequence. |
| 249 * Otherwise we convert or report the pair of bytes. |
| 250 */ |
| 251 leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); |
| 252 trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); |
| 253 if (leadIsOk && trailIsOk) { |
| 254 tempBuf[0] = (char) (leadByte+0x80) ; |
| 255 tempBuf[1] = (char) (mySourceChar+0x80); |
| 256 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbCo
nverter->sharedData, |
| 257 tempBuf, 2, args->converter->useFallback); |
| 258 mySourceChar= (leadByte << 8) | mySourceChar; |
| 259 } else if (trailIsOk) { |
| 260 /* report a single illegal byte and continue with the fo
llowing DBCS starter byte */ |
| 261 --mySource; |
| 262 mySourceChar = (int32_t)leadByte; |
| 263 } else { |
| 264 /* report a pair of illegal bytes if the second byte is
not a DBCS starter */ |
| 265 /* add another bit so that the code below writes 2 bytes
in case of error */ |
| 266 mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; |
| 267 } |
| 229 args->converter->toUnicodeStatus =0x00; | 268 args->converter->toUnicodeStatus =0x00; |
| 230 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConver
ter->sharedData, | |
| 231 tempBuf, 2, args->converter->useFallback); | |
| 232 } | 269 } |
| 233 } | 270 } |
| 234 else{ | 271 else{ |
| 235 if(args->converter->fromUnicodeStatus == 0x00){ | 272 if(mySourceChar == UCNV_TILDE) { |
| 236 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConver
ter->sharedData, | 273 args->converter->mode = UCNV_TILDE; |
| 237 mySource - 1, 1, args->converter->useFallback); | 274 continue; |
| 275 } else if(mySourceChar <= 0x7f) { |
| 276 targetUniChar = (UChar)mySourceChar; /* ASCII */ |
| 277 myData->isEmptySegment = FALSE; /* the segment has something
valid */ |
| 278 } else { |
| 279 targetUniChar = 0xffff; |
| 280 myData->isEmptySegment = FALSE; /* different error here, res
et this to avoid spurious future error */ |
| 238 } | 281 } |
| 239 else{ | |
| 240 goto SAVE_STATE; | |
| 241 } | |
| 242 | |
| 243 } | 282 } |
| 244 if(targetUniChar < 0xfffe){ | 283 if(targetUniChar < 0xfffe){ |
| 245 if(args->offsets) { | 284 if(args->offsets) { |
| 246 args->offsets[myTarget - args->target]=(int32_t)(mySource -
args->source - 1-(myData->isStateDBCS)); | 285 args->offsets[myTarget - args->target]=(int32_t)(mySource -
args->source - 1-(myData->isStateDBCS)); |
| 247 } | 286 } |
| 248 | 287 |
| 249 *(myTarget++)=(UChar)targetUniChar; | 288 *(myTarget++)=(UChar)targetUniChar; |
| 250 } | 289 } |
| 251 else if(targetUniChar>=0xfffe){ | 290 else /* targetUniChar>=0xfffe */ { |
| 252 SAVE_STATE: | |
| 253 if(targetUniChar == 0xfffe){ | 291 if(targetUniChar == 0xfffe){ |
| 254 *err = U_INVALID_CHAR_FOUND; | 292 *err = U_INVALID_CHAR_FOUND; |
| 255 } | 293 } |
| 256 else{ | 294 else{ |
| 257 *err = U_ILLEGAL_CHAR_FOUND; | 295 *err = U_ILLEGAL_CHAR_FOUND; |
| 258 } | 296 } |
| 259 if(myData->isStateDBCS){ | 297 if(mySourceChar > 0xff){ |
| 260 /* this should never occur since isStateDBCS is set to true | 298 args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); |
| 261 * only after tempBuf[0] and tempBuf[1] | 299 args->converter->toUBytes[1] = (uint8_t)mySourceChar; |
| 262 * are set to the input .. just to please BEAM | 300 args->converter->toULength=2; |
| 263 */ | |
| 264 if(tempBuf[0]==0 || tempBuf[1]==0){ | |
| 265 *err = U_INTERNAL_PROGRAM_ERROR; | |
| 266 }else{ | |
| 267 args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80
); | |
| 268 args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80
); | |
| 269 args->converter->toULength=2; | |
| 270 } | |
| 271 } | 301 } |
| 272 else{ | 302 else{ |
| 273 args->converter->toUBytes[0] = (uint8_t)mySourceChar; | 303 args->converter->toUBytes[0] = (uint8_t)mySourceChar; |
| 274 args->converter->toULength=1; | 304 args->converter->toULength=1; |
| 275 } | 305 } |
| 276 break; | 306 break; |
| 277 } | 307 } |
| 278 } | 308 } |
| 279 else{ | 309 else{ |
| 280 *err =U_BUFFER_OVERFLOW_ERROR; | 310 *err =U_BUFFER_OVERFLOW_ERROR; |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 321 mySourceChar = (UChar) mySource[mySourceIndex++]; | 351 mySourceChar = (UChar) mySource[mySourceIndex++]; |
| 322 | 352 |
| 323 | 353 |
| 324 oldIsTargetUCharDBCS = isTargetUCharDBCS; | 354 oldIsTargetUCharDBCS = isTargetUCharDBCS; |
| 325 if(mySourceChar ==UCNV_TILDE){ | 355 if(mySourceChar ==UCNV_TILDE){ |
| 326 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",er
r,2,&mySourceIndex);*/ | 356 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",er
r,2,&mySourceIndex);*/ |
| 327 len = ESC_LEN; | 357 len = ESC_LEN; |
| 328 escSeq = TILDE_ESCAPE; | 358 escSeq = TILDE_ESCAPE; |
| 329 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,er
r,len,mySourceIndex); | 359 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,er
r,len,mySourceIndex); |
| 330 continue; | 360 continue; |
| 331 } | 361 } else if(mySourceChar <= 0x7f) { |
| 332 else{ | 362 length = 1; |
| 363 targetUniChar = mySourceChar; |
| 364 } else { |
| 333 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->share
dData, | 365 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->share
dData, |
| 334 mySourceChar,&targetUniChar,args->converter->useFallback); | 366 mySourceChar,&targetUniChar,args->converter->useFallback); |
| 335 | 367 /* we can only use lead bytes 21..7D and trail bytes 21..7E */ |
| 336 } | 368 if( length == 2 && |
| 337 /* only DBCS or SBCS characters are expected*/ | 369 (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && |
| 338 /* DB haracters with high bit set to 1 are expected */ | 370 (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) |
| 339 if(length > 2 || length==0 ||(((targetUniChar & 0x8080) != 0x8080)&&
length==2)){ | 371 ) { |
| 340 targetUniChar= missingCharMarker; | 372 targetUniChar -= 0x8080; |
| 373 } else { |
| 374 targetUniChar = missingCharMarker; |
| 375 } |
| 341 } | 376 } |
| 342 if (targetUniChar != missingCharMarker){ | 377 if (targetUniChar != missingCharMarker){ |
| 343 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(
targetUniChar>0x00FF); | 378 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(
targetUniChar>0x00FF); |
| 344 if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterDat
a->isEscapeAppended ){ | 379 if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterDat
a->isEscapeAppended ){ |
| 345 /*Shifting from a double byte to single byte mode*/ | 380 /*Shifting from a double byte to single byte mode*/ |
| 346 if(!isTargetUCharDBCS){ | 381 if(!isTargetUCharDBCS){ |
| 347 len =ESC_LEN; | 382 len =ESC_LEN; |
| 348 escSeq = SB_ESCAPE; | 383 escSeq = SB_ESCAPE; |
| 349 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, e
scSeq,err,len,mySourceIndex); | 384 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, e
scSeq,err,len,mySourceIndex); |
| 350 myConverterData->isEscapeAppended = TRUE; | 385 myConverterData->isEscapeAppended = TRUE; |
| 351 } | 386 } |
| 352 else{ /* Shifting from a single byte to double byte mode*/ | 387 else{ /* Shifting from a single byte to double byte mode*/ |
| 353 len =ESC_LEN; | 388 len =ESC_LEN; |
| 354 escSeq = DB_ESCAPE; | 389 escSeq = DB_ESCAPE; |
| 355 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, e
scSeq,err,len,mySourceIndex); | 390 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, e
scSeq,err,len,mySourceIndex); |
| 356 myConverterData->isEscapeAppended = TRUE; | 391 myConverterData->isEscapeAppended = TRUE; |
| 357 | 392 |
| 358 } | 393 } |
| 359 } | 394 } |
| 360 | 395 |
| 361 if(isTargetUCharDBCS){ | 396 if(isTargetUCharDBCS){ |
| 362 if( myTargetIndex <targetLength){ | 397 if( myTargetIndex <targetLength){ |
| 363 myTarget[myTargetIndex++] =(char) ((targetUniChar >> 8)
-0x80); | 398 myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); |
| 364 if(offsets){ | 399 if(offsets){ |
| 365 *(offsets++) = mySourceIndex-1; | 400 *(offsets++) = mySourceIndex-1; |
| 366 } | 401 } |
| 367 if(myTargetIndex < targetLength){ | 402 if(myTargetIndex < targetLength){ |
| 368 myTarget[myTargetIndex++] =(char) ((targetUniChar &
0x00FF) -0x80); | 403 myTarget[myTargetIndex++] =(char) targetUniChar; |
| 369 if(offsets){ | 404 if(offsets){ |
| 370 *(offsets++) = mySourceIndex-1; | 405 *(offsets++) = mySourceIndex-1; |
| 371 } | 406 } |
| 372 }else{ | 407 }else{ |
| 373 args->converter->charErrorBuffer[args->converter->ch
arErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80); | 408 args->converter->charErrorBuffer[args->converter->ch
arErrorBufferLength++] = (char) targetUniChar; |
| 374 *err = U_BUFFER_OVERFLOW_ERROR; | 409 *err = U_BUFFER_OVERFLOW_ERROR; |
| 375 } | 410 } |
| 376 }else{ | 411 }else{ |
| 377 args->converter->charErrorBuffer[args->converter->charEr
rorBufferLength++] =(char) ((targetUniChar >> 8) -0x80); | 412 args->converter->charErrorBuffer[args->converter->charEr
rorBufferLength++] =(char) (targetUniChar >> 8); |
| 378 args->converter->charErrorBuffer[args->converter->charEr
rorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80); | 413 args->converter->charErrorBuffer[args->converter->charEr
rorBufferLength++] = (char) targetUniChar; |
| 379 *err = U_BUFFER_OVERFLOW_ERROR; | 414 *err = U_BUFFER_OVERFLOW_ERROR; |
| 380 } | 415 } |
| 381 | 416 |
| 382 }else{ | 417 }else{ |
| 383 if( myTargetIndex <targetLength){ | 418 if( myTargetIndex <targetLength){ |
| 384 myTarget[myTargetIndex++] = (char) (targetUniChar ); | 419 myTarget[myTargetIndex++] = (char) (targetUniChar ); |
| 385 if(offsets){ | 420 if(offsets){ |
| 386 *(offsets++) = mySourceIndex-1; | 421 *(offsets++) = mySourceIndex-1; |
| 387 } | 422 } |
| 388 | 423 |
| (...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 517 ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localC
lone->subCnv, &size, status); | 552 ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localC
lone->subCnv, &size, status); |
| 518 | 553 |
| 519 return &localClone->cnv; | 554 return &localClone->cnv; |
| 520 } | 555 } |
| 521 | 556 |
| 522 static void | 557 static void |
| 523 _HZ_GetUnicodeSet(const UConverter *cnv, | 558 _HZ_GetUnicodeSet(const UConverter *cnv, |
| 524 const USetAdder *sa, | 559 const USetAdder *sa, |
| 525 UConverterUnicodeSet which, | 560 UConverterUnicodeSet which, |
| 526 UErrorCode *pErrorCode) { | 561 UErrorCode *pErrorCode) { |
| 527 /* the tilde '~' is hardcoded in the converter */ | 562 /* HZ converts all of ASCII */ |
| 528 sa->add(sa->set, 0x7e); | 563 sa->addRange(sa->set, 0, 0x7f); |
| 529 | 564 |
| 530 /* add all of the code points that the sub-converter handles */ | 565 /* add all of the code points that the sub-converter handles */ |
| 531 ((UConverterDataHZ*)cnv->extraInfo)-> | 566 ucnv_MBCSGetFilteredUnicodeSetForUnicode( |
| 532 gbConverter->sharedData->impl-> | 567 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, |
| 533 getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, | 568 sa, which, UCNV_SET_FILTER_HZ, |
| 534 sa, which, pErrorCode); | 569 pErrorCode); |
| 535 } | 570 } |
| 536 | 571 |
| 537 static const UConverterImpl _HZImpl={ | 572 static const UConverterImpl _HZImpl={ |
| 538 | 573 |
| 539 UCNV_HZ, | 574 UCNV_HZ, |
| 540 | 575 |
| 541 NULL, | 576 NULL, |
| 542 NULL, | 577 NULL, |
| 543 | 578 |
| 544 _HZOpen, | 579 _HZOpen, |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 582 ~((uint32_t) 0), | 617 ~((uint32_t) 0), |
| 583 NULL, | 618 NULL, |
| 584 NULL, | 619 NULL, |
| 585 &_HZStaticData, | 620 &_HZStaticData, |
| 586 FALSE, | 621 FALSE, |
| 587 &_HZImpl, | 622 &_HZImpl, |
| 588 0 | 623 0 |
| 589 }; | 624 }; |
| 590 | 625 |
| 591 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 626 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| OLD | NEW |