OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2003-2009, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: ucm.c |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2003jun20 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * This file reads a .ucm file, stores its mappings and sorts them. |
| 17 * It implements handling of Unicode conversion mappings from .ucm files |
| 18 * for makeconv, canonucm, rptp2ucm, etc. |
| 19 * |
| 20 * Unicode code point sequences with a length of more than 1, |
| 21 * as well as byte sequences with more than 4 bytes or more than one complete |
| 22 * character sequence are handled to support m:n mappings. |
| 23 */ |
| 24 |
| 25 #include "unicode/utypes.h" |
| 26 #include "unicode/ustring.h" |
| 27 #include "cstring.h" |
| 28 #include "cmemory.h" |
| 29 #include "filestrm.h" |
| 30 #include "uarrsort.h" |
| 31 #include "ucnvmbcs.h" |
| 32 #include "ucnv_bld.h" |
| 33 #include "ucnv_ext.h" |
| 34 #include "uparse.h" |
| 35 #include "ucm.h" |
| 36 #include <stdio.h> |
| 37 |
| 38 #if !UCONFIG_NO_CONVERSION |
| 39 |
| 40 /* -------------------------------------------------------------------------- */ |
| 41 |
| 42 static void |
| 43 printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { |
| 44 int32_t j; |
| 45 |
| 46 for(j=0; j<m->uLen; ++j) { |
| 47 fprintf(f, "<U%04lX>", (long)codePoints[j]); |
| 48 } |
| 49 |
| 50 fputc(' ', f); |
| 51 |
| 52 for(j=0; j<m->bLen; ++j) { |
| 53 fprintf(f, "\\x%02X", bytes[j]); |
| 54 } |
| 55 |
| 56 if(m->f>=0) { |
| 57 fprintf(f, " |%u\n", m->f); |
| 58 } else { |
| 59 fputs("\n", f); |
| 60 } |
| 61 } |
| 62 |
| 63 U_CAPI void U_EXPORT2 |
| 64 ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { |
| 65 printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f); |
| 66 } |
| 67 |
| 68 U_CAPI void U_EXPORT2 |
| 69 ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { |
| 70 UCMapping *m; |
| 71 int32_t i, length; |
| 72 |
| 73 m=table->mappings; |
| 74 length=table->mappingsLength; |
| 75 if(byUnicode) { |
| 76 for(i=0; i<length; ++m, ++i) { |
| 77 ucm_printMapping(table, m, f); |
| 78 } |
| 79 } else { |
| 80 const int32_t *map=table->reverseMap; |
| 81 for(i=0; i<length; ++i) { |
| 82 ucm_printMapping(table, m+map[i], f); |
| 83 } |
| 84 } |
| 85 } |
| 86 |
| 87 /* mapping comparisons ------------------------------------------------------ */ |
| 88 |
| 89 static int32_t |
| 90 compareUnicode(UCMTable *lTable, const UCMapping *l, |
| 91 UCMTable *rTable, const UCMapping *r) { |
| 92 const UChar32 *lu, *ru; |
| 93 int32_t result, i, length; |
| 94 |
| 95 if(l->uLen==1 && r->uLen==1) { |
| 96 /* compare two single code points */ |
| 97 return l->u-r->u; |
| 98 } |
| 99 |
| 100 /* get pointers to the code point sequences */ |
| 101 lu=UCM_GET_CODE_POINTS(lTable, l); |
| 102 ru=UCM_GET_CODE_POINTS(rTable, r); |
| 103 |
| 104 /* get the minimum length */ |
| 105 if(l->uLen<=r->uLen) { |
| 106 length=l->uLen; |
| 107 } else { |
| 108 length=r->uLen; |
| 109 } |
| 110 |
| 111 /* compare the code points */ |
| 112 for(i=0; i<length; ++i) { |
| 113 result=lu[i]-ru[i]; |
| 114 if(result!=0) { |
| 115 return result; |
| 116 } |
| 117 } |
| 118 |
| 119 /* compare the lengths */ |
| 120 return l->uLen-r->uLen; |
| 121 } |
| 122 |
| 123 static int32_t |
| 124 compareBytes(UCMTable *lTable, const UCMapping *l, |
| 125 UCMTable *rTable, const UCMapping *r, |
| 126 UBool lexical) { |
| 127 const uint8_t *lb, *rb; |
| 128 int32_t result, i, length; |
| 129 |
| 130 /* |
| 131 * A lexical comparison is used for sorting in the builder, to allow |
| 132 * an efficient search for a byte sequence that could be a prefix |
| 133 * of a previously entered byte sequence. |
| 134 * |
| 135 * Comparing by lengths first is for compatibility with old .ucm tools |
| 136 * like canonucm and rptp2ucm. |
| 137 */ |
| 138 if(lexical) { |
| 139 /* get the minimum length and continue */ |
| 140 if(l->bLen<=r->bLen) { |
| 141 length=l->bLen; |
| 142 } else { |
| 143 length=r->bLen; |
| 144 } |
| 145 } else { |
| 146 /* compare lengths first */ |
| 147 result=l->bLen-r->bLen; |
| 148 if(result!=0) { |
| 149 return result; |
| 150 } else { |
| 151 length=l->bLen; |
| 152 } |
| 153 } |
| 154 |
| 155 /* get pointers to the byte sequences */ |
| 156 lb=UCM_GET_BYTES(lTable, l); |
| 157 rb=UCM_GET_BYTES(rTable, r); |
| 158 |
| 159 /* compare the bytes */ |
| 160 for(i=0; i<length; ++i) { |
| 161 result=lb[i]-rb[i]; |
| 162 if(result!=0) { |
| 163 return result; |
| 164 } |
| 165 } |
| 166 |
| 167 /* compare the lengths */ |
| 168 return l->bLen-r->bLen; |
| 169 } |
| 170 |
| 171 /* compare UCMappings for sorting */ |
| 172 static int32_t |
| 173 compareMappings(UCMTable *lTable, const UCMapping *l, |
| 174 UCMTable *rTable, const UCMapping *r, |
| 175 UBool uFirst) { |
| 176 int32_t result; |
| 177 |
| 178 /* choose which side to compare first */ |
| 179 if(uFirst) { |
| 180 /* Unicode then bytes */ |
| 181 result=compareUnicode(lTable, l, rTable, r); |
| 182 if(result==0) { |
| 183 result=compareBytes(lTable, l, rTable, r, FALSE); /* not lexically,
like canonucm */ |
| 184 } |
| 185 } else { |
| 186 /* bytes then Unicode */ |
| 187 result=compareBytes(lTable, l, rTable, r, TRUE); /* lexically, for build
er */ |
| 188 if(result==0) { |
| 189 result=compareUnicode(lTable, l, rTable, r); |
| 190 } |
| 191 } |
| 192 |
| 193 if(result!=0) { |
| 194 return result; |
| 195 } |
| 196 |
| 197 /* compare the flags */ |
| 198 return l->f-r->f; |
| 199 } |
| 200 |
| 201 /* sorting by Unicode first sorts mappings directly */ |
| 202 static int32_t |
| 203 compareMappingsUnicodeFirst(const void *context, const void *left, const void *r
ight) { |
| 204 return compareMappings( |
| 205 (UCMTable *)context, (const UCMapping *)left, |
| 206 (UCMTable *)context, (const UCMapping *)right, TRUE); |
| 207 } |
| 208 |
| 209 /* sorting by bytes first sorts the reverseMap; use indirection to mappings */ |
| 210 static int32_t |
| 211 compareMappingsBytesFirst(const void *context, const void *left, const void *rig
ht) { |
| 212 UCMTable *table=(UCMTable *)context; |
| 213 int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; |
| 214 return compareMappings( |
| 215 table, table->mappings+l, |
| 216 table, table->mappings+r, FALSE); |
| 217 } |
| 218 |
| 219 U_CAPI void U_EXPORT2 |
| 220 ucm_sortTable(UCMTable *t) { |
| 221 UErrorCode errorCode; |
| 222 int32_t i; |
| 223 |
| 224 if(t->isSorted) { |
| 225 return; |
| 226 } |
| 227 |
| 228 errorCode=U_ZERO_ERROR; |
| 229 |
| 230 /* 1. sort by Unicode first */ |
| 231 uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), |
| 232 compareMappingsUnicodeFirst, t, |
| 233 FALSE, &errorCode); |
| 234 |
| 235 /* build the reverseMap */ |
| 236 if(t->reverseMap==NULL) { |
| 237 /* |
| 238 * allocate mappingsCapacity instead of mappingsLength so that |
| 239 * if mappings are added, the reverseMap need not be |
| 240 * reallocated each time |
| 241 * (see ucm_moveMappings() and ucm_addMapping()) |
| 242 */ |
| 243 t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)
); |
| 244 if(t->reverseMap==NULL) { |
| 245 fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); |
| 246 exit(U_MEMORY_ALLOCATION_ERROR); |
| 247 } |
| 248 } |
| 249 for(i=0; i<t->mappingsLength; ++i) { |
| 250 t->reverseMap[i]=i; |
| 251 } |
| 252 |
| 253 /* 2. sort reverseMap by mappings bytes first */ |
| 254 uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), |
| 255 compareMappingsBytesFirst, t, |
| 256 FALSE, &errorCode); |
| 257 |
| 258 if(U_FAILURE(errorCode)) { |
| 259 fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", |
| 260 u_errorName(errorCode)); |
| 261 exit(errorCode); |
| 262 } |
| 263 |
| 264 t->isSorted=TRUE; |
| 265 } |
| 266 |
| 267 /* |
| 268 * remove mappings with their move flag set from the base table |
| 269 * and move some of them (with UCM_MOVE_TO_EXT) to the extension table |
| 270 */ |
| 271 U_CAPI void U_EXPORT2 |
| 272 ucm_moveMappings(UCMTable *base, UCMTable *ext) { |
| 273 UCMapping *mb, *mbLimit; |
| 274 int8_t flag; |
| 275 |
| 276 mb=base->mappings; |
| 277 mbLimit=mb+base->mappingsLength; |
| 278 |
| 279 while(mb<mbLimit) { |
| 280 flag=mb->moveFlag; |
| 281 if(flag!=0) { |
| 282 /* reset the move flag */ |
| 283 mb->moveFlag=0; |
| 284 |
| 285 if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) { |
| 286 /* add the mapping to the extension table */ |
| 287 ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_B
YTES(base, mb)); |
| 288 } |
| 289 |
| 290 /* remove this mapping: move the last base mapping down and overwrit
e the current one */ |
| 291 if(mb<(mbLimit-1)) { |
| 292 uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping)); |
| 293 } |
| 294 --mbLimit; |
| 295 --base->mappingsLength; |
| 296 base->isSorted=FALSE; |
| 297 } else { |
| 298 ++mb; |
| 299 } |
| 300 } |
| 301 } |
| 302 |
| 303 enum { |
| 304 NEEDS_MOVE=1, |
| 305 HAS_ERRORS=2 |
| 306 }; |
| 307 |
| 308 static uint8_t |
| 309 checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, |
| 310 UBool moveToExt, UBool intersectBase) { |
| 311 UCMapping *mb, *me, *mbLimit, *meLimit; |
| 312 int32_t cmp; |
| 313 uint8_t result; |
| 314 |
| 315 mb=base->mappings; |
| 316 mbLimit=mb+base->mappingsLength; |
| 317 |
| 318 me=ext->mappings; |
| 319 meLimit=me+ext->mappingsLength; |
| 320 |
| 321 result=0; |
| 322 |
| 323 for(;;) { |
| 324 /* skip irrelevant mappings on both sides */ |
| 325 for(;;) { |
| 326 if(mb==mbLimit) { |
| 327 return result; |
| 328 } |
| 329 |
| 330 if(0<=mb->f && mb->f<=2) { |
| 331 break; |
| 332 } |
| 333 |
| 334 ++mb; |
| 335 } |
| 336 |
| 337 for(;;) { |
| 338 if(me==meLimit) { |
| 339 return result; |
| 340 } |
| 341 |
| 342 if(0<=me->f && me->f<=2) { |
| 343 break; |
| 344 } |
| 345 |
| 346 ++me; |
| 347 } |
| 348 |
| 349 /* compare the base and extension mappings */ |
| 350 cmp=compareUnicode(base, mb, ext, me); |
| 351 if(cmp<0) { |
| 352 if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { |
| 353 /* |
| 354 * mapping in base but not in ext, move it |
| 355 * |
| 356 * if ext is DBCS, move DBCS mappings here |
| 357 * and check SBCS ones for Unicode prefix below |
| 358 */ |
| 359 mb->moveFlag|=UCM_MOVE_TO_EXT; |
| 360 result|=NEEDS_MOVE; |
| 361 |
| 362 /* does mb map from an input sequence that is a prefix of me's? */ |
| 363 } else if( mb->uLen<me->uLen && |
| 364 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINT
S(ext, me), 4*mb->uLen) |
| 365 ) { |
| 366 if(moveToExt) { |
| 367 /* mark this mapping to be moved to the extension table */ |
| 368 mb->moveFlag|=UCM_MOVE_TO_EXT; |
| 369 result|=NEEDS_MOVE; |
| 370 } else { |
| 371 fprintf(stderr, |
| 372 "ucm error: the base table contains a mapping whose
input sequence\n" |
| 373 " is a prefix of the input sequence of an
extension mapping\n"); |
| 374 ucm_printMapping(base, mb, stderr); |
| 375 ucm_printMapping(ext, me, stderr); |
| 376 result|=HAS_ERRORS; |
| 377 } |
| 378 } |
| 379 |
| 380 ++mb; |
| 381 } else if(cmp==0) { |
| 382 /* |
| 383 * same output: remove the extension mapping, |
| 384 * otherwise treat as an error |
| 385 */ |
| 386 if( mb->f==me->f && mb->bLen==me->bLen && |
| 387 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me),
mb->bLen) |
| 388 ) { |
| 389 me->moveFlag|=UCM_REMOVE_MAPPING; |
| 390 result|=NEEDS_MOVE; |
| 391 } else if(intersectBase) { |
| 392 /* mapping in base but not in ext, move it */ |
| 393 mb->moveFlag|=UCM_MOVE_TO_EXT; |
| 394 result|=NEEDS_MOVE; |
| 395 } else { |
| 396 fprintf(stderr, |
| 397 "ucm error: the base table contains a mapping whose inpu
t sequence\n" |
| 398 " is the same as the input sequence of an exte
nsion mapping\n" |
| 399 " but it maps differently\n"); |
| 400 ucm_printMapping(base, mb, stderr); |
| 401 ucm_printMapping(ext, me, stderr); |
| 402 result|=HAS_ERRORS; |
| 403 } |
| 404 |
| 405 ++mb; |
| 406 } else /* cmp>0 */ { |
| 407 ++me; |
| 408 } |
| 409 } |
| 410 } |
| 411 |
| 412 static uint8_t |
| 413 checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, |
| 414 UBool moveToExt, UBool intersectBase) { |
| 415 UCMapping *mb, *me; |
| 416 int32_t *baseMap, *extMap; |
| 417 int32_t b, e, bLimit, eLimit, cmp; |
| 418 uint8_t result; |
| 419 UBool isSISO; |
| 420 |
| 421 baseMap=base->reverseMap; |
| 422 extMap=ext->reverseMap; |
| 423 |
| 424 b=e=0; |
| 425 bLimit=base->mappingsLength; |
| 426 eLimit=ext->mappingsLength; |
| 427 |
| 428 result=0; |
| 429 |
| 430 isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); |
| 431 |
| 432 for(;;) { |
| 433 /* skip irrelevant mappings on both sides */ |
| 434 for(;; ++b) { |
| 435 if(b==bLimit) { |
| 436 return result; |
| 437 } |
| 438 mb=base->mappings+baseMap[b]; |
| 439 |
| 440 if(intersectBase==2 && mb->bLen==1) { |
| 441 /* |
| 442 * comparing a base against a DBCS extension: |
| 443 * leave SBCS base mappings alone |
| 444 */ |
| 445 continue; |
| 446 } |
| 447 |
| 448 if(mb->f==0 || mb->f==3) { |
| 449 break; |
| 450 } |
| 451 } |
| 452 |
| 453 for(;;) { |
| 454 if(e==eLimit) { |
| 455 return result; |
| 456 } |
| 457 me=ext->mappings+extMap[e]; |
| 458 |
| 459 if(me->f==0 || me->f==3) { |
| 460 break; |
| 461 } |
| 462 |
| 463 ++e; |
| 464 } |
| 465 |
| 466 /* compare the base and extension mappings */ |
| 467 cmp=compareBytes(base, mb, ext, me, TRUE); |
| 468 if(cmp<0) { |
| 469 if(intersectBase) { |
| 470 /* mapping in base but not in ext, move it */ |
| 471 mb->moveFlag|=UCM_MOVE_TO_EXT; |
| 472 result|=NEEDS_MOVE; |
| 473 |
| 474 /* |
| 475 * does mb map from an input sequence that is a prefix of me's? |
| 476 * for SI/SO tables, a single byte is never a prefix because it |
| 477 * occurs in a separate single-byte state |
| 478 */ |
| 479 } else if( mb->bLen<me->bLen && |
| 480 (!isSISO || mb->bLen>1) && |
| 481 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me),
mb->bLen) |
| 482 ) { |
| 483 if(moveToExt) { |
| 484 /* mark this mapping to be moved to the extension table */ |
| 485 mb->moveFlag|=UCM_MOVE_TO_EXT; |
| 486 result|=NEEDS_MOVE; |
| 487 } else { |
| 488 fprintf(stderr, |
| 489 "ucm error: the base table contains a mapping whose
input sequence\n" |
| 490 " is a prefix of the input sequence of an
extension mapping\n"); |
| 491 ucm_printMapping(base, mb, stderr); |
| 492 ucm_printMapping(ext, me, stderr); |
| 493 result|=HAS_ERRORS; |
| 494 } |
| 495 } |
| 496 |
| 497 ++b; |
| 498 } else if(cmp==0) { |
| 499 /* |
| 500 * same output: remove the extension mapping, |
| 501 * otherwise treat as an error |
| 502 */ |
| 503 if( mb->f==me->f && mb->uLen==me->uLen && |
| 504 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINT
S(ext, me), 4*mb->uLen) |
| 505 ) { |
| 506 me->moveFlag|=UCM_REMOVE_MAPPING; |
| 507 result|=NEEDS_MOVE; |
| 508 } else if(intersectBase) { |
| 509 /* mapping in base but not in ext, move it */ |
| 510 mb->moveFlag|=UCM_MOVE_TO_EXT; |
| 511 result|=NEEDS_MOVE; |
| 512 } else { |
| 513 fprintf(stderr, |
| 514 "ucm error: the base table contains a mapping whose inpu
t sequence\n" |
| 515 " is the same as the input sequence of an exte
nsion mapping\n" |
| 516 " but it maps differently\n"); |
| 517 ucm_printMapping(base, mb, stderr); |
| 518 ucm_printMapping(ext, me, stderr); |
| 519 result|=HAS_ERRORS; |
| 520 } |
| 521 |
| 522 ++b; |
| 523 } else /* cmp>0 */ { |
| 524 ++e; |
| 525 } |
| 526 } |
| 527 } |
| 528 |
| 529 U_CAPI UBool U_EXPORT2 |
| 530 ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { |
| 531 UCMapping *m, *mLimit; |
| 532 int32_t count; |
| 533 UBool isOK; |
| 534 |
| 535 m=table->mappings; |
| 536 mLimit=m+table->mappingsLength; |
| 537 isOK=TRUE; |
| 538 |
| 539 while(m<mLimit) { |
| 540 count=ucm_countChars(baseStates, UCM_GET_BYTES(table, m), m->bLen); |
| 541 if(count<1) { |
| 542 ucm_printMapping(table, m, stderr); |
| 543 isOK=FALSE; |
| 544 } |
| 545 ++m; |
| 546 } |
| 547 |
| 548 return isOK; |
| 549 } |
| 550 |
| 551 U_CAPI UBool U_EXPORT2 |
| 552 ucm_checkBaseExt(UCMStates *baseStates, |
| 553 UCMTable *base, UCMTable *ext, UCMTable *moveTarget, |
| 554 UBool intersectBase) { |
| 555 uint8_t result; |
| 556 |
| 557 /* if we have an extension table, we must always use precision flags */ |
| 558 if(base->flagsType&UCM_FLAGS_IMPLICIT) { |
| 559 fprintf(stderr, "ucm error: the base table contains mappings without pre
cision flags\n"); |
| 560 return FALSE; |
| 561 } |
| 562 if(ext->flagsType&UCM_FLAGS_IMPLICIT) { |
| 563 fprintf(stderr, "ucm error: extension table contains mappings without pr
ecision flags\n"); |
| 564 return FALSE; |
| 565 } |
| 566 |
| 567 /* checking requires both tables to be sorted */ |
| 568 ucm_sortTable(base); |
| 569 ucm_sortTable(ext); |
| 570 |
| 571 /* check */ |
| 572 result= |
| 573 checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), in
tersectBase)| |
| 574 checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), inte
rsectBase); |
| 575 |
| 576 if(result&HAS_ERRORS) { |
| 577 return FALSE; |
| 578 } |
| 579 |
| 580 if(result&NEEDS_MOVE) { |
| 581 ucm_moveMappings(ext, NULL); |
| 582 ucm_moveMappings(base, moveTarget); |
| 583 ucm_sortTable(base); |
| 584 ucm_sortTable(ext); |
| 585 if(moveTarget!=NULL) { |
| 586 ucm_sortTable(moveTarget); |
| 587 } |
| 588 } |
| 589 |
| 590 return TRUE; |
| 591 } |
| 592 |
| 593 /* merge tables for rptp2ucm ------------------------------------------------ */ |
| 594 |
| 595 U_CAPI void U_EXPORT2 |
| 596 ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, |
| 597 const uint8_t *subchar, int32_t subcharLength, |
| 598 uint8_t subchar1) { |
| 599 UCMapping *fromUMapping, *toUMapping; |
| 600 int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; |
| 601 |
| 602 ucm_sortTable(fromUTable); |
| 603 ucm_sortTable(toUTable); |
| 604 |
| 605 fromUMapping=fromUTable->mappings; |
| 606 toUMapping=toUTable->mappings; |
| 607 |
| 608 fromUTop=fromUTable->mappingsLength; |
| 609 toUTop=toUTable->mappingsLength; |
| 610 |
| 611 fromUIndex=toUIndex=0; |
| 612 |
| 613 while(fromUIndex<fromUTop && toUIndex<toUTop) { |
| 614 cmp=compareMappings(fromUTable, fromUMapping, toUTable, toUMapping, TRUE
); |
| 615 if(cmp==0) { |
| 616 /* equal: roundtrip, nothing to do (flags are initially 0) */ |
| 617 ++fromUMapping; |
| 618 ++toUMapping; |
| 619 |
| 620 ++fromUIndex; |
| 621 ++toUIndex; |
| 622 } else if(cmp<0) { |
| 623 /* |
| 624 * the fromU mapping does not have a toU counterpart: |
| 625 * fallback Unicode->codepage |
| 626 */ |
| 627 if( (fromUMapping->bLen==subcharLength && |
| 628 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar
, subcharLength)) || |
| 629 (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0
]==subchar1) |
| 630 ) { |
| 631 fromUMapping->f=2; /* SUB mapping */ |
| 632 } else { |
| 633 fromUMapping->f=1; /* normal fallback */ |
| 634 } |
| 635 |
| 636 ++fromUMapping; |
| 637 ++fromUIndex; |
| 638 } else { |
| 639 /* |
| 640 * the toU mapping does not have a fromU counterpart: |
| 641 * (reverse) fallback codepage->Unicode, copy it to the fromU table |
| 642 */ |
| 643 |
| 644 /* ignore reverse fallbacks to Unicode SUB */ |
| 645 if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u=
=0x1a))) { |
| 646 toUMapping->f=3; /* reverse fallback */ |
| 647 ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTa
ble, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); |
| 648 |
| 649 /* the table may have been reallocated */ |
| 650 fromUMapping=fromUTable->mappings+fromUIndex; |
| 651 } |
| 652 |
| 653 ++toUMapping; |
| 654 ++toUIndex; |
| 655 } |
| 656 } |
| 657 |
| 658 /* either one or both tables are exhausted */ |
| 659 while(fromUIndex<fromUTop) { |
| 660 /* leftover fromU mappings are fallbacks */ |
| 661 if( (fromUMapping->bLen==subcharLength && |
| 662 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, su
bcharLength)) || |
| 663 (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==s
ubchar1) |
| 664 ) { |
| 665 fromUMapping->f=2; /* SUB mapping */ |
| 666 } else { |
| 667 fromUMapping->f=1; /* normal fallback */ |
| 668 } |
| 669 |
| 670 ++fromUMapping; |
| 671 ++fromUIndex; |
| 672 } |
| 673 |
| 674 while(toUIndex<toUTop) { |
| 675 /* leftover toU mappings are reverse fallbacks */ |
| 676 |
| 677 /* ignore reverse fallbacks to Unicode SUB */ |
| 678 if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1
a))) { |
| 679 toUMapping->f=3; /* reverse fallback */ |
| 680 ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable,
toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); |
| 681 } |
| 682 |
| 683 ++toUMapping; |
| 684 ++toUIndex; |
| 685 } |
| 686 |
| 687 fromUTable->isSorted=FALSE; |
| 688 } |
| 689 |
| 690 /* separate extension mappings out of base table for rptp2ucm --------------- */ |
| 691 |
| 692 U_CAPI UBool U_EXPORT2 |
| 693 ucm_separateMappings(UCMFile *ucm, UBool isSISO) { |
| 694 UCMTable *table; |
| 695 UCMapping *m, *mLimit; |
| 696 int32_t type; |
| 697 UBool needsMove, isOK; |
| 698 |
| 699 table=ucm->base; |
| 700 m=table->mappings; |
| 701 mLimit=m+table->mappingsLength; |
| 702 |
| 703 needsMove=FALSE; |
| 704 isOK=TRUE; |
| 705 |
| 706 for(; m<mLimit; ++m) { |
| 707 if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { |
| 708 fprintf(stderr, "warning: removing illegal mapping from an SI/SO-sta
teful table\n"); |
| 709 ucm_printMapping(table, m, stderr); |
| 710 m->moveFlag|=UCM_REMOVE_MAPPING; |
| 711 needsMove=TRUE; |
| 712 continue; |
| 713 } |
| 714 |
| 715 type=ucm_mappingType( |
| 716 &ucm->states, m, |
| 717 UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); |
| 718 if(type<0) { |
| 719 /* illegal byte sequence */ |
| 720 printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table,
m), stderr); |
| 721 isOK=FALSE; |
| 722 } else if(type>0) { |
| 723 m->moveFlag|=UCM_MOVE_TO_EXT; |
| 724 needsMove=TRUE; |
| 725 } |
| 726 } |
| 727 |
| 728 if(!isOK) { |
| 729 return FALSE; |
| 730 } |
| 731 if(needsMove) { |
| 732 ucm_moveMappings(ucm->base, ucm->ext); |
| 733 return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FAL
SE); |
| 734 } else { |
| 735 ucm_sortTable(ucm->base); |
| 736 return TRUE; |
| 737 } |
| 738 } |
| 739 |
| 740 /* ucm parser --------------------------------------------------------------- */ |
| 741 |
| 742 U_CAPI int8_t U_EXPORT2 |
| 743 ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char *
*ps) { |
| 744 const char *s=*ps; |
| 745 char *end; |
| 746 uint8_t byte; |
| 747 int8_t bLen; |
| 748 |
| 749 bLen=0; |
| 750 for(;;) { |
| 751 /* skip an optional plus sign */ |
| 752 if(bLen>0 && *s=='+') { |
| 753 ++s; |
| 754 } |
| 755 if(*s!='\\') { |
| 756 break; |
| 757 } |
| 758 |
| 759 if( s[1]!='x' || |
| 760 (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4 |
| 761 ) { |
| 762 fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex d
igits) - \"%s\"\n", line); |
| 763 return -1; |
| 764 } |
| 765 |
| 766 if(bLen==UCNV_EXT_MAX_BYTES) { |
| 767 fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line); |
| 768 return -1; |
| 769 } |
| 770 bytes[bLen++]=byte; |
| 771 s=end; |
| 772 } |
| 773 |
| 774 *ps=s; |
| 775 return bLen; |
| 776 } |
| 777 |
| 778 /* parse a mapping line; must not be empty */ |
| 779 U_CAPI UBool U_EXPORT2 |
| 780 ucm_parseMappingLine(UCMapping *m, |
| 781 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], |
| 782 uint8_t bytes[UCNV_EXT_MAX_BYTES], |
| 783 const char *line) { |
| 784 const char *s; |
| 785 char *end; |
| 786 UChar32 cp; |
| 787 int32_t u16Length; |
| 788 int8_t uLen, bLen, f; |
| 789 |
| 790 s=line; |
| 791 uLen=bLen=0; |
| 792 |
| 793 /* parse code points */ |
| 794 for(;;) { |
| 795 /* skip an optional plus sign */ |
| 796 if(uLen>0 && *s=='+') { |
| 797 ++s; |
| 798 } |
| 799 if(*s!='<') { |
| 800 break; |
| 801 } |
| 802 |
| 803 if( s[1]!='U' || |
| 804 (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || |
| 805 *end!='>' |
| 806 ) { |
| 807 fprintf(stderr, "ucm error: Unicode code point must be formatted as
<UXXXX> (1..6 hex digits) - \"%s\"\n", line); |
| 808 return FALSE; |
| 809 } |
| 810 if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { |
| 811 fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e0
00..10ffff - \"%s\"\n", line); |
| 812 return FALSE; |
| 813 } |
| 814 |
| 815 if(uLen==UCNV_EXT_MAX_UCHARS) { |
| 816 fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line)
; |
| 817 return FALSE; |
| 818 } |
| 819 codePoints[uLen++]=cp; |
| 820 s=end+1; |
| 821 } |
| 822 |
| 823 if(uLen==0) { |
| 824 fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); |
| 825 return FALSE; |
| 826 } else if(uLen==1) { |
| 827 m->u=codePoints[0]; |
| 828 } else { |
| 829 UErrorCode errorCode=U_ZERO_ERROR; |
| 830 u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); |
| 831 if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || |
| 832 u16Length>UCNV_EXT_MAX_UCHARS |
| 833 ) { |
| 834 fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); |
| 835 return FALSE; |
| 836 } |
| 837 } |
| 838 |
| 839 s=u_skipWhitespace(s); |
| 840 |
| 841 /* parse bytes */ |
| 842 bLen=ucm_parseBytes(bytes, line, &s); |
| 843 |
| 844 if(bLen<0) { |
| 845 return FALSE; |
| 846 } else if(bLen==0) { |
| 847 fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); |
| 848 return FALSE; |
| 849 } else if(bLen<=4) { |
| 850 uprv_memcpy(m->b.bytes, bytes, bLen); |
| 851 } |
| 852 |
| 853 /* skip everything until the fallback indicator, even the start of a comment
*/ |
| 854 for(;;) { |
| 855 if(*s==0) { |
| 856 f=-1; /* no fallback indicator */ |
| 857 break; |
| 858 } else if(*s=='|') { |
| 859 f=(int8_t)(s[1]-'0'); |
| 860 if((uint8_t)f>3) { |
| 861 fprintf(stderr, "ucm error: fallback indicator must be |0..|3 -
\"%s\"\n", line); |
| 862 return FALSE; |
| 863 } |
| 864 break; |
| 865 } |
| 866 ++s; |
| 867 } |
| 868 |
| 869 m->uLen=uLen; |
| 870 m->bLen=bLen; |
| 871 m->f=f; |
| 872 return TRUE; |
| 873 } |
| 874 |
| 875 /* general APIs ------------------------------------------------------------- */ |
| 876 |
| 877 U_CAPI UCMTable * U_EXPORT2 |
| 878 ucm_openTable() { |
| 879 UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable)); |
| 880 if(table==NULL) { |
| 881 fprintf(stderr, "ucm error: unable to allocate a UCMTable\n"); |
| 882 exit(U_MEMORY_ALLOCATION_ERROR); |
| 883 } |
| 884 |
| 885 memset(table, 0, sizeof(UCMTable)); |
| 886 return table; |
| 887 } |
| 888 |
| 889 U_CAPI void U_EXPORT2 |
| 890 ucm_closeTable(UCMTable *table) { |
| 891 if(table!=NULL) { |
| 892 uprv_free(table->mappings); |
| 893 uprv_free(table->codePoints); |
| 894 uprv_free(table->bytes); |
| 895 uprv_free(table->reverseMap); |
| 896 uprv_free(table); |
| 897 } |
| 898 } |
| 899 |
| 900 U_CAPI void U_EXPORT2 |
| 901 ucm_resetTable(UCMTable *table) { |
| 902 if(table!=NULL) { |
| 903 table->mappingsLength=0; |
| 904 table->flagsType=0; |
| 905 table->unicodeMask=0; |
| 906 table->bytesLength=table->codePointsLength=0; |
| 907 table->isSorted=FALSE; |
| 908 } |
| 909 } |
| 910 |
| 911 U_CAPI void U_EXPORT2 |
| 912 ucm_addMapping(UCMTable *table, |
| 913 UCMapping *m, |
| 914 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], |
| 915 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { |
| 916 UCMapping *tm; |
| 917 UChar32 c; |
| 918 int32_t idx; |
| 919 |
| 920 if(table->mappingsLength>=table->mappingsCapacity) { |
| 921 /* make the mappings array larger */ |
| 922 if(table->mappingsCapacity==0) { |
| 923 table->mappingsCapacity=1000; |
| 924 } else { |
| 925 table->mappingsCapacity*=10; |
| 926 } |
| 927 table->mappings=(UCMapping *)uprv_realloc(table->mappings, |
| 928 table->mappingsCapacity*sizeof(UCMa
pping)); |
| 929 if(table->mappings==NULL) { |
| 930 fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n", |
| 931 (int)table->mappingsCapacity); |
| 932 exit(U_MEMORY_ALLOCATION_ERROR); |
| 933 } |
| 934 |
| 935 if(table->reverseMap!=NULL) { |
| 936 /* the reverseMap must be reallocated in a new sort */ |
| 937 uprv_free(table->reverseMap); |
| 938 table->reverseMap=NULL; |
| 939 } |
| 940 } |
| 941 |
| 942 if(m->uLen>1 && table->codePointsCapacity==0) { |
| 943 table->codePointsCapacity=10000; |
| 944 table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4); |
| 945 if(table->codePoints==NULL) { |
| 946 fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n", |
| 947 (int)table->codePointsCapacity); |
| 948 exit(U_MEMORY_ALLOCATION_ERROR); |
| 949 } |
| 950 } |
| 951 |
| 952 if(m->bLen>4 && table->bytesCapacity==0) { |
| 953 table->bytesCapacity=10000; |
| 954 table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity); |
| 955 if(table->bytes==NULL) { |
| 956 fprintf(stderr, "ucm error: unable to allocate %d bytes\n", |
| 957 (int)table->bytesCapacity); |
| 958 exit(U_MEMORY_ALLOCATION_ERROR); |
| 959 } |
| 960 } |
| 961 |
| 962 if(m->uLen>1) { |
| 963 idx=table->codePointsLength; |
| 964 table->codePointsLength+=m->uLen; |
| 965 if(table->codePointsLength>table->codePointsCapacity) { |
| 966 fprintf(stderr, "ucm error: too many code points in multiple-code po
int mappings\n"); |
| 967 exit(U_MEMORY_ALLOCATION_ERROR); |
| 968 } |
| 969 |
| 970 uprv_memcpy(table->codePoints+idx, codePoints, m->uLen*4); |
| 971 m->u=idx; |
| 972 } |
| 973 |
| 974 if(m->bLen>4) { |
| 975 idx=table->bytesLength; |
| 976 table->bytesLength+=m->bLen; |
| 977 if(table->bytesLength>table->bytesCapacity) { |
| 978 fprintf(stderr, "ucm error: too many bytes in mappings with >4 chars
et bytes\n"); |
| 979 exit(U_MEMORY_ALLOCATION_ERROR); |
| 980 } |
| 981 |
| 982 uprv_memcpy(table->bytes+idx, bytes, m->bLen); |
| 983 m->b.idx=idx; |
| 984 } |
| 985 |
| 986 /* set unicodeMask */ |
| 987 for(idx=0; idx<m->uLen; ++idx) { |
| 988 c=codePoints[idx]; |
| 989 if(c>=0x10000) { |
| 990 table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementa
ry code points */ |
| 991 } else if(U_IS_SURROGATE(c)) { |
| 992 table->unicodeMask|=UCNV_HAS_SURROGATES; /* there are surrogate c
ode points */ |
| 993 } |
| 994 } |
| 995 |
| 996 /* set flagsType */ |
| 997 if(m->f<0) { |
| 998 table->flagsType|=UCM_FLAGS_IMPLICIT; |
| 999 } else { |
| 1000 table->flagsType|=UCM_FLAGS_EXPLICIT; |
| 1001 } |
| 1002 |
| 1003 tm=table->mappings+table->mappingsLength++; |
| 1004 uprv_memcpy(tm, m, sizeof(UCMapping)); |
| 1005 |
| 1006 table->isSorted=FALSE; |
| 1007 } |
| 1008 |
| 1009 U_CAPI UCMFile * U_EXPORT2 |
| 1010 ucm_open() { |
| 1011 UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile)); |
| 1012 if(ucm==NULL) { |
| 1013 fprintf(stderr, "ucm error: unable to allocate a UCMFile\n"); |
| 1014 exit(U_MEMORY_ALLOCATION_ERROR); |
| 1015 } |
| 1016 |
| 1017 memset(ucm, 0, sizeof(UCMFile)); |
| 1018 |
| 1019 ucm->base=ucm_openTable(); |
| 1020 ucm->ext=ucm_openTable(); |
| 1021 |
| 1022 ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; |
| 1023 ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; |
| 1024 ucm->states.outputType=-1; |
| 1025 ucm->states.minCharLength=ucm->states.maxCharLength=1; |
| 1026 |
| 1027 return ucm; |
| 1028 } |
| 1029 |
| 1030 U_CAPI void U_EXPORT2 |
| 1031 ucm_close(UCMFile *ucm) { |
| 1032 if(ucm!=NULL) { |
| 1033 uprv_free(ucm->base); |
| 1034 uprv_free(ucm->ext); |
| 1035 uprv_free(ucm); |
| 1036 } |
| 1037 } |
| 1038 |
| 1039 U_CAPI int32_t U_EXPORT2 |
| 1040 ucm_mappingType(UCMStates *baseStates, |
| 1041 UCMapping *m, |
| 1042 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], |
| 1043 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { |
| 1044 /* check validity of the bytes and count the characters in them */ |
| 1045 int32_t count=ucm_countChars(baseStates, bytes, m->bLen); |
| 1046 if(count<1) { |
| 1047 /* illegal byte sequence */ |
| 1048 return -1; |
| 1049 } |
| 1050 |
| 1051 /* |
| 1052 * Suitable for an ICU conversion base table means: |
| 1053 * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) |
| 1054 * - SBCS: any 1:1 mapping |
| 1055 * (the table stores additional bits to distinguish mapping types) |
| 1056 * - MBCS: not a |2 SUB mapping for <subchar1> |
| 1057 * - MBCS: not a |1 fallback to 0x00 |
| 1058 * - MBCS: not a multi-byte mapping with leading 0x00 bytes |
| 1059 * |
| 1060 * Further restrictions for fromUnicode tables |
| 1061 * are enforced in makeconv (MBCSOkForBaseFromUnicode()). |
| 1062 * |
| 1063 * All of the MBCS fromUnicode specific tests could be removed from here, |
| 1064 * but the ones above are for unusual mappings, and removing the tests |
| 1065 * from here would change canonucm output which seems gratuitous. |
| 1066 * (Markus Scherer 2006-nov-28) |
| 1067 * |
| 1068 * Exception: All implicit mappings (f<0) that need to be moved |
| 1069 * because of fromUnicode restrictions _must_ be moved here because |
| 1070 * makeconv uses a hack for moving mappings only for the fromUnicode table |
| 1071 * that only works with non-negative values of f. |
| 1072 */ |
| 1073 if( m->uLen==1 && count==1 && |
| 1074 (baseStates->maxCharLength==1 || |
| 1075 !((m->f==2 && m->bLen==1) || |
| 1076 (m->f==1 && bytes[0]==0) || |
| 1077 (m->f<=1 && m->bLen>1 && bytes[0]==0))) |
| 1078 ) { |
| 1079 return 0; /* suitable for a base table */ |
| 1080 } else { |
| 1081 return 1; /* needs to go into an extension table */ |
| 1082 } |
| 1083 } |
| 1084 |
| 1085 U_CAPI UBool U_EXPORT2 |
| 1086 ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, |
| 1087 UCMapping *m, |
| 1088 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], |
| 1089 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { |
| 1090 int32_t type; |
| 1091 |
| 1092 if(m->f==2 && m->uLen>1) { |
| 1093 fprintf(stderr, "ucm error: illegal <subchar1> |2 mapping from multiple
code points\n"); |
| 1094 printMapping(m, codePoints, bytes, stderr); |
| 1095 return FALSE; |
| 1096 } |
| 1097 |
| 1098 if(baseStates!=NULL) { |
| 1099 /* check validity of the bytes and count the characters in them */ |
| 1100 type=ucm_mappingType(baseStates, m, codePoints, bytes); |
| 1101 if(type<0) { |
| 1102 /* illegal byte sequence */ |
| 1103 printMapping(m, codePoints, bytes, stderr); |
| 1104 return FALSE; |
| 1105 } |
| 1106 } else { |
| 1107 /* not used - adding a mapping for an extension-only table before its ba
se table is read */ |
| 1108 type=1; |
| 1109 } |
| 1110 |
| 1111 /* |
| 1112 * Add the mapping to the base table if this is requested and suitable. |
| 1113 * Otherwise, add it to the extension table. |
| 1114 */ |
| 1115 if(forBase && type==0) { |
| 1116 ucm_addMapping(ucm->base, m, codePoints, bytes); |
| 1117 } else { |
| 1118 ucm_addMapping(ucm->ext, m, codePoints, bytes); |
| 1119 } |
| 1120 |
| 1121 return TRUE; |
| 1122 } |
| 1123 |
| 1124 U_CAPI UBool U_EXPORT2 |
| 1125 ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates
*baseStates) { |
| 1126 UCMapping m={ 0 }; |
| 1127 UChar32 codePoints[UCNV_EXT_MAX_UCHARS]; |
| 1128 uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
| 1129 |
| 1130 const char *s; |
| 1131 |
| 1132 /* ignore empty and comment lines */ |
| 1133 if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { |
| 1134 return TRUE; |
| 1135 } |
| 1136 |
| 1137 return |
| 1138 ucm_parseMappingLine(&m, codePoints, bytes, line) && |
| 1139 ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); |
| 1140 } |
| 1141 |
| 1142 U_CAPI void U_EXPORT2 |
| 1143 ucm_readTable(UCMFile *ucm, FileStream* convFile, |
| 1144 UBool forBase, UCMStates *baseStates, |
| 1145 UErrorCode *pErrorCode) { |
| 1146 char line[500]; |
| 1147 char *end; |
| 1148 UBool isOK; |
| 1149 |
| 1150 if(U_FAILURE(*pErrorCode)) { |
| 1151 return; |
| 1152 } |
| 1153 |
| 1154 isOK=TRUE; |
| 1155 |
| 1156 for(;;) { |
| 1157 /* read the next line */ |
| 1158 if(!T_FileStream_readLine(convFile, line, sizeof(line))) { |
| 1159 fprintf(stderr, "incomplete charmap section\n"); |
| 1160 isOK=FALSE; |
| 1161 break; |
| 1162 } |
| 1163 |
| 1164 /* remove CR LF */ |
| 1165 end=uprv_strchr(line, 0); |
| 1166 while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) { |
| 1167 --end; |
| 1168 } |
| 1169 *end=0; |
| 1170 |
| 1171 /* ignore empty and comment lines */ |
| 1172 if(line[0]==0 || line[0]=='#') { |
| 1173 continue; |
| 1174 } |
| 1175 |
| 1176 /* stop at the end of the mapping table */ |
| 1177 if(0==uprv_strcmp(line, "END CHARMAP")) { |
| 1178 break; |
| 1179 } |
| 1180 |
| 1181 isOK&=ucm_addMappingFromLine(ucm, line, forBase, baseStates); |
| 1182 } |
| 1183 |
| 1184 if(!isOK) { |
| 1185 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 1186 } |
| 1187 } |
| 1188 #endif |
OLD | NEW |