OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 1999-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: uinvchar.c |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:2 |
| 12 * |
| 13 * created on: 2004sep14 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * Functions for handling invariant characters, moved here from putil.c |
| 17 * for better modularization. |
| 18 */ |
| 19 |
| 20 #include "unicode/utypes.h" |
| 21 #include "unicode/ustring.h" |
| 22 #include "udataswp.h" |
| 23 #include "cstring.h" |
| 24 #include "cmemory.h" |
| 25 #include "uassert.h" |
| 26 #include "uinvchar.h" |
| 27 |
| 28 /* invariant-character handling --------------------------------------------- */ |
| 29 |
| 30 /* |
| 31 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) |
| 32 * appropriately for most EBCDIC codepages. |
| 33 * |
| 34 * They currently also map most other ASCII graphic characters, |
| 35 * appropriately for codepages 37 and 1047. |
| 36 * Exceptions: The characters for []^ have different codes in 37 & 1047. |
| 37 * Both versions are mapped to ASCII. |
| 38 * |
| 39 * ASCII 37 1047 |
| 40 * [ 5B BA AD |
| 41 * ] 5D BB BD |
| 42 * ^ 5E B0 5F |
| 43 * |
| 44 * There are no mappings for variant characters from Unicode to EBCDIC. |
| 45 * |
| 46 * Currently, C0 control codes are also included in these maps. |
| 47 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other |
| 48 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), |
| 49 * but there is no mapping for ASCII LF back to EBCDIC. |
| 50 * |
| 51 * ASCII EBCDIC S/390-OE |
| 52 * LF 0A 25 15 |
| 53 * NEL 85 15 25 |
| 54 * |
| 55 * The maps below explicitly exclude the variant |
| 56 * control and graphical characters that are in ASCII-based |
| 57 * codepages at 0x80 and above. |
| 58 * "No mapping" is expressed by mapping to a 00 byte. |
| 59 * |
| 60 * These tables do not establish a converter or a codepage. |
| 61 */ |
| 62 |
| 63 static const uint8_t asciiFromEbcdic[256]={ |
| 64 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c
, 0x0d, 0x0e, 0x0f, |
| 65 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c
, 0x1d, 0x1e, 0x1f, |
| 66 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00
, 0x05, 0x06, 0x07, |
| 67 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14
, 0x15, 0x00, 0x1a, |
| 68 |
| 69 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c
, 0x28, 0x2b, 0x7c, |
| 70 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a
, 0x29, 0x3b, 0x5e, |
| 71 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25
, 0x5f, 0x3e, 0x3f, |
| 72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40
, 0x27, 0x3d, 0x22, |
| 73 |
| 74 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00
, 0x00, 0x00, 0x00, |
| 75 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00
, 0x00, 0x00, 0x00, |
| 76 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00
, 0x5b, 0x00, 0x00, |
| 77 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00
, 0x5d, 0x00, 0x00, |
| 78 |
| 79 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00
, 0x00, 0x00, 0x00, |
| 80 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00
, 0x00, 0x00, 0x00, |
| 81 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00
, 0x00, 0x00, 0x00, |
| 82 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00
, 0x00, 0x00, 0x00 |
| 83 }; |
| 84 |
| 85 static const uint8_t ebcdicFromAscii[256]={ |
| 86 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c
, 0x0d, 0x0e, 0x0f, |
| 87 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c
, 0x1d, 0x1e, 0x1f, |
| 88 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b
, 0x60, 0x4b, 0x61, |
| 89 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c
, 0x7e, 0x6e, 0x6f, |
| 90 |
| 91 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3
, 0xd4, 0xd5, 0xd6, |
| 92 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00
, 0x00, 0x00, 0x6d, |
| 93 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93
, 0x94, 0x95, 0x96, |
| 94 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00
, 0x00, 0x00, 0x07, |
| 95 |
| 96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 100 |
| 101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| 105 }; |
| 106 |
| 107 /* |
| 108 * Bit sets indicating which characters of the ASCII repertoire |
| 109 * (by ASCII/Unicode code) are "invariant". |
| 110 * See utypes.h for more details. |
| 111 * |
| 112 * As invariant are considered the characters of the ASCII repertoire except |
| 113 * for the following: |
| 114 * 21 '!' <exclamation mark> |
| 115 * 23 '#' <number sign> |
| 116 * 24 '$' <dollar sign> |
| 117 * |
| 118 * 40 '@' <commercial at> |
| 119 * |
| 120 * 5b '[' <left bracket> |
| 121 * 5c '\' <backslash> |
| 122 * 5d ']' <right bracket> |
| 123 * 5e '^' <circumflex> |
| 124 * |
| 125 * 60 '`' <grave accent> |
| 126 * |
| 127 * 7b '{' <left brace> |
| 128 * 7c '|' <vertical line> |
| 129 * 7d '}' <right brace> |
| 130 * 7e '~' <tilde> |
| 131 */ |
| 132 static const uint32_t invariantChars[4]={ |
| 133 0xfffffbff, /* 00..1f but not 0a */ |
| 134 0xffffffe5, /* 20..3f but not 21 23 24 */ |
| 135 0x87fffffe, /* 40..5f but not 40 5b..5e */ |
| 136 0x87fffffe /* 60..7f but not 60 7b..7e */ |
| 137 }; |
| 138 |
| 139 /* |
| 140 * test unsigned types (or values known to be non-negative) for invariant charac
ters, |
| 141 * tests ASCII-family character values |
| 142 */ |
| 143 #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t
)1<<((c)&0x1f)))!=0) |
| 144 |
| 145 /* test signed types for invariant characters, adds test for positive values */ |
| 146 #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) |
| 147 |
| 148 #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
| 149 #define CHAR_TO_UCHAR(c) c |
| 150 #define UCHAR_TO_CHAR(c) c |
| 151 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
| 152 #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] |
| 153 #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] |
| 154 #else |
| 155 # error U_CHARSET_FAMILY is not valid |
| 156 #endif |
| 157 |
| 158 |
| 159 U_CAPI void U_EXPORT2 |
| 160 u_charsToUChars(const char *cs, UChar *us, int32_t length) { |
| 161 UChar u; |
| 162 uint8_t c; |
| 163 |
| 164 /* |
| 165 * Allow the entire ASCII repertoire to be mapped _to_ Unicode. |
| 166 * For EBCDIC systems, this works for characters with codes from |
| 167 * codepages 37 and 1047 or compatible. |
| 168 */ |
| 169 while(length>0) { |
| 170 c=(uint8_t)(*cs++); |
| 171 u=(UChar)CHAR_TO_UCHAR(c); |
| 172 U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ |
| 173 *us++=u; |
| 174 --length; |
| 175 } |
| 176 } |
| 177 |
| 178 U_CAPI void U_EXPORT2 |
| 179 u_UCharsToChars(const UChar *us, char *cs, int32_t length) { |
| 180 UChar u; |
| 181 |
| 182 while(length>0) { |
| 183 u=*us++; |
| 184 if(!UCHAR_IS_INVARIANT(u)) { |
| 185 U_ASSERT(FALSE); /* Variant characters were used. These are not port
able in ICU. */ |
| 186 u=0; |
| 187 } |
| 188 *cs++=(char)UCHAR_TO_CHAR(u); |
| 189 --length; |
| 190 } |
| 191 } |
| 192 |
| 193 U_CAPI UBool U_EXPORT2 |
| 194 uprv_isInvariantString(const char *s, int32_t length) { |
| 195 uint8_t c; |
| 196 |
| 197 for(;;) { |
| 198 if(length<0) { |
| 199 /* NUL-terminated */ |
| 200 c=(uint8_t)*s++; |
| 201 if(c==0) { |
| 202 break; |
| 203 } |
| 204 } else { |
| 205 /* count length */ |
| 206 if(length==0) { |
| 207 break; |
| 208 } |
| 209 --length; |
| 210 c=(uint8_t)*s++; |
| 211 if(c==0) { |
| 212 continue; /* NUL is invariant */ |
| 213 } |
| 214 } |
| 215 /* c!=0 now, one branch below checks c==0 for variant characters */ |
| 216 |
| 217 /* |
| 218 * no assertions here because these functions are legitimately called |
| 219 * for strings with variant characters |
| 220 */ |
| 221 #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
| 222 if(!UCHAR_IS_INVARIANT(c)) { |
| 223 return FALSE; /* found a variant char */ |
| 224 } |
| 225 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
| 226 c=CHAR_TO_UCHAR(c); |
| 227 if(c==0 || !UCHAR_IS_INVARIANT(c)) { |
| 228 return FALSE; /* found a variant char */ |
| 229 } |
| 230 #else |
| 231 # error U_CHARSET_FAMILY is not valid |
| 232 #endif |
| 233 } |
| 234 return TRUE; |
| 235 } |
| 236 |
| 237 U_CAPI UBool U_EXPORT2 |
| 238 uprv_isInvariantUString(const UChar *s, int32_t length) { |
| 239 UChar c; |
| 240 |
| 241 for(;;) { |
| 242 if(length<0) { |
| 243 /* NUL-terminated */ |
| 244 c=*s++; |
| 245 if(c==0) { |
| 246 break; |
| 247 } |
| 248 } else { |
| 249 /* count length */ |
| 250 if(length==0) { |
| 251 break; |
| 252 } |
| 253 --length; |
| 254 c=*s++; |
| 255 } |
| 256 |
| 257 /* |
| 258 * no assertions here because these functions are legitimately called |
| 259 * for strings with variant characters |
| 260 */ |
| 261 if(!UCHAR_IS_INVARIANT(c)) { |
| 262 return FALSE; /* found a variant char */ |
| 263 } |
| 264 } |
| 265 return TRUE; |
| 266 } |
| 267 |
| 268 /* UDataSwapFn implementations used in udataswp.c ------- */ |
| 269 |
| 270 /* convert ASCII to EBCDIC and verify that all characters are invariant */ |
| 271 U_CAPI int32_t U_EXPORT2 |
| 272 uprv_ebcdicFromAscii(const UDataSwapper *ds, |
| 273 const void *inData, int32_t length, void *outData, |
| 274 UErrorCode *pErrorCode) { |
| 275 const uint8_t *s; |
| 276 uint8_t *t; |
| 277 uint8_t c; |
| 278 |
| 279 int32_t count; |
| 280 |
| 281 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 282 return 0; |
| 283 } |
| 284 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { |
| 285 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 286 return 0; |
| 287 } |
| 288 |
| 289 /* setup and swapping */ |
| 290 s=(const uint8_t *)inData; |
| 291 t=(uint8_t *)outData; |
| 292 count=length; |
| 293 while(count>0) { |
| 294 c=*s++; |
| 295 if(!UCHAR_IS_INVARIANT(c)) { |
| 296 udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a v
ariant character in position %d\n", |
| 297 length, length-count); |
| 298 *pErrorCode=U_INVALID_CHAR_FOUND; |
| 299 return 0; |
| 300 } |
| 301 *t++=ebcdicFromAscii[c]; |
| 302 --count; |
| 303 } |
| 304 |
| 305 return length; |
| 306 } |
| 307 |
| 308 /* this function only checks and copies ASCII strings without conversion */ |
| 309 U_CFUNC int32_t |
| 310 uprv_copyAscii(const UDataSwapper *ds, |
| 311 const void *inData, int32_t length, void *outData, |
| 312 UErrorCode *pErrorCode) { |
| 313 const uint8_t *s; |
| 314 uint8_t c; |
| 315 |
| 316 int32_t count; |
| 317 |
| 318 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 319 return 0; |
| 320 } |
| 321 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { |
| 322 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 323 return 0; |
| 324 } |
| 325 |
| 326 /* setup and checking */ |
| 327 s=(const uint8_t *)inData; |
| 328 count=length; |
| 329 while(count>0) { |
| 330 c=*s++; |
| 331 if(!UCHAR_IS_INVARIANT(c)) { |
| 332 udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a var
iant character in position %d\n", |
| 333 length, length-count); |
| 334 *pErrorCode=U_INVALID_CHAR_FOUND; |
| 335 return 0; |
| 336 } |
| 337 --count; |
| 338 } |
| 339 |
| 340 if(length>0 && inData!=outData) { |
| 341 uprv_memcpy(outData, inData, length); |
| 342 } |
| 343 |
| 344 return length; |
| 345 } |
| 346 |
| 347 /* convert EBCDIC to ASCII and verify that all characters are invariant */ |
| 348 U_CFUNC int32_t |
| 349 uprv_asciiFromEbcdic(const UDataSwapper *ds, |
| 350 const void *inData, int32_t length, void *outData, |
| 351 UErrorCode *pErrorCode) { |
| 352 const uint8_t *s; |
| 353 uint8_t *t; |
| 354 uint8_t c; |
| 355 |
| 356 int32_t count; |
| 357 |
| 358 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 359 return 0; |
| 360 } |
| 361 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { |
| 362 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 363 return 0; |
| 364 } |
| 365 |
| 366 /* setup and swapping */ |
| 367 s=(const uint8_t *)inData; |
| 368 t=(uint8_t *)outData; |
| 369 count=length; |
| 370 while(count>0) { |
| 371 c=*s++; |
| 372 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { |
| 373 udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a v
ariant character in position %d\n", |
| 374 length, length-count); |
| 375 *pErrorCode=U_INVALID_CHAR_FOUND; |
| 376 return 0; |
| 377 } |
| 378 *t++=c; |
| 379 --count; |
| 380 } |
| 381 |
| 382 return length; |
| 383 } |
| 384 |
| 385 /* this function only checks and copies EBCDIC strings without conversion */ |
| 386 U_CFUNC int32_t |
| 387 uprv_copyEbcdic(const UDataSwapper *ds, |
| 388 const void *inData, int32_t length, void *outData, |
| 389 UErrorCode *pErrorCode) { |
| 390 const uint8_t *s; |
| 391 uint8_t c; |
| 392 |
| 393 int32_t count; |
| 394 |
| 395 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 396 return 0; |
| 397 } |
| 398 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { |
| 399 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 400 return 0; |
| 401 } |
| 402 |
| 403 /* setup and checking */ |
| 404 s=(const uint8_t *)inData; |
| 405 count=length; |
| 406 while(count>0) { |
| 407 c=*s++; |
| 408 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { |
| 409 udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant
character in position %d\n", |
| 410 length, length-count); |
| 411 *pErrorCode=U_INVALID_CHAR_FOUND; |
| 412 return 0; |
| 413 } |
| 414 --count; |
| 415 } |
| 416 |
| 417 if(length>0 && inData!=outData) { |
| 418 uprv_memcpy(outData, inData, length); |
| 419 } |
| 420 |
| 421 return length; |
| 422 } |
| 423 |
| 424 /* compare invariant strings; variant characters compare less than others and un
like each other */ |
| 425 U_CFUNC int32_t |
| 426 uprv_compareInvAscii(const UDataSwapper *ds, |
| 427 const char *outString, int32_t outLength, |
| 428 const UChar *localString, int32_t localLength) { |
| 429 int32_t minLength; |
| 430 UChar32 c1, c2; |
| 431 uint8_t c; |
| 432 |
| 433 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { |
| 434 return 0; |
| 435 } |
| 436 |
| 437 if(outLength<0) { |
| 438 outLength=(int32_t)uprv_strlen(outString); |
| 439 } |
| 440 if(localLength<0) { |
| 441 localLength=u_strlen(localString); |
| 442 } |
| 443 |
| 444 minLength= outLength<localLength ? outLength : localLength; |
| 445 |
| 446 while(minLength>0) { |
| 447 c=(uint8_t)*outString++; |
| 448 if(UCHAR_IS_INVARIANT(c)) { |
| 449 c1=c; |
| 450 } else { |
| 451 c1=-1; |
| 452 } |
| 453 |
| 454 c2=*localString++; |
| 455 if(!UCHAR_IS_INVARIANT(c2)) { |
| 456 c2=-2; |
| 457 } |
| 458 |
| 459 if((c1-=c2)!=0) { |
| 460 return c1; |
| 461 } |
| 462 |
| 463 --minLength; |
| 464 } |
| 465 |
| 466 /* strings start with same prefix, compare lengths */ |
| 467 return outLength-localLength; |
| 468 } |
| 469 |
| 470 U_CFUNC int32_t |
| 471 uprv_compareInvEbcdic(const UDataSwapper *ds, |
| 472 const char *outString, int32_t outLength, |
| 473 const UChar *localString, int32_t localLength) { |
| 474 int32_t minLength; |
| 475 UChar32 c1, c2; |
| 476 uint8_t c; |
| 477 |
| 478 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { |
| 479 return 0; |
| 480 } |
| 481 |
| 482 if(outLength<0) { |
| 483 outLength=(int32_t)uprv_strlen(outString); |
| 484 } |
| 485 if(localLength<0) { |
| 486 localLength=u_strlen(localString); |
| 487 } |
| 488 |
| 489 minLength= outLength<localLength ? outLength : localLength; |
| 490 |
| 491 while(minLength>0) { |
| 492 c=(uint8_t)*outString++; |
| 493 if(c==0) { |
| 494 c1=0; |
| 495 } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { |
| 496 /* c1 is set */ |
| 497 } else { |
| 498 c1=-1; |
| 499 } |
| 500 |
| 501 c2=*localString++; |
| 502 if(!UCHAR_IS_INVARIANT(c2)) { |
| 503 c2=-2; |
| 504 } |
| 505 |
| 506 if((c1-=c2)!=0) { |
| 507 return c1; |
| 508 } |
| 509 |
| 510 --minLength; |
| 511 } |
| 512 |
| 513 /* strings start with same prefix, compare lengths */ |
| 514 return outLength-localLength; |
| 515 } |
| 516 |
| 517 U_CAPI int32_t U_EXPORT2 |
| 518 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { |
| 519 int32_t c1, c2; |
| 520 |
| 521 for(;; ++s1, ++s2) { |
| 522 c1=(uint8_t)*s1; |
| 523 c2=(uint8_t)*s2; |
| 524 if(c1!=c2) { |
| 525 if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))
) { |
| 526 c1=-(int32_t)(uint8_t)*s1; |
| 527 } |
| 528 if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))
) { |
| 529 c2=-(int32_t)(uint8_t)*s2; |
| 530 } |
| 531 return c1-c2; |
| 532 } else if(c1==0) { |
| 533 return 0; |
| 534 } |
| 535 } |
| 536 } |
| 537 |
| 538 |
| 539 U_INTERNAL uint8_t* U_EXPORT2 |
| 540 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) |
| 541 { |
| 542 uint8_t *orig_dst = dst; |
| 543 |
| 544 if(n==-1) { |
| 545 n = uprv_strlen((const char*)src)+1; /* copy NUL */ |
| 546 } |
| 547 /* copy non-null */ |
| 548 while(*src && n>0) { |
| 549 *(dst++) = asciiFromEbcdic[*(src++)]; |
| 550 n--; |
| 551 } |
| 552 /* pad */ |
| 553 while(n>0) { |
| 554 *(dst++) = 0; |
| 555 n--; |
| 556 } |
| 557 return orig_dst; |
| 558 } |
| 559 |
| 560 U_INTERNAL uint8_t* U_EXPORT2 |
| 561 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) |
| 562 { |
| 563 uint8_t *orig_dst = dst; |
| 564 |
| 565 if(n==-1) { |
| 566 n = uprv_strlen((const char*)src)+1; /* copy NUL */ |
| 567 } |
| 568 /* copy non-null */ |
| 569 while(*src && n>0) { |
| 570 char ch = ebcdicFromAscii[*(src++)]; |
| 571 if(ch == 0) { |
| 572 ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ |
| 573 } |
| 574 *(dst++) = ch; |
| 575 n--; |
| 576 } |
| 577 /* pad */ |
| 578 while(n>0) { |
| 579 *(dst++) = 0; |
| 580 n--; |
| 581 } |
| 582 return orig_dst; |
| 583 } |
| 584 |
OLD | NEW |