OLD | NEW |
(Empty) | |
| 1 /* |
| 2 *******************************************************************************
* |
| 3 * |
| 4 * Copyright (C) 1998-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 *******************************************************************************
* |
| 8 * |
| 9 * |
| 10 * makeconv.c: |
| 11 * tool creating a binary (compressed) representation of the conversion mapping |
| 12 * table (IBM NLTC ucmap format). |
| 13 * |
| 14 * 05/04/2000 helena Added fallback mapping into the picture... |
| 15 * 06/29/2000 helena Major rewrite of the callback APIs. |
| 16 */ |
| 17 |
| 18 #include <stdio.h> |
| 19 #include "unicode/putil.h" |
| 20 #include "unicode/ucnv_err.h" |
| 21 #include "ucnv_bld.h" |
| 22 #include "ucnv_imp.h" |
| 23 #include "ucnv_cnv.h" |
| 24 #include "cstring.h" |
| 25 #include "cmemory.h" |
| 26 #include "uinvchar.h" |
| 27 #include "filestrm.h" |
| 28 #include "toolutil.h" |
| 29 #include "uoptions.h" |
| 30 #include "unicode/udata.h" |
| 31 #include "unewdata.h" |
| 32 #include "uparse.h" |
| 33 #include "ucm.h" |
| 34 #include "makeconv.h" |
| 35 #include "genmbcs.h" |
| 36 |
| 37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
| 38 |
| 39 #define DEBUG 0 |
| 40 |
| 41 typedef struct ConvData { |
| 42 UCMFile *ucm; |
| 43 NewConverter *cnvData, *extData; |
| 44 UConverterSharedData sharedData; |
| 45 UConverterStaticData staticData; |
| 46 } ConvData; |
| 47 |
| 48 static void |
| 49 initConvData(ConvData *data) { |
| 50 uprv_memset(data, 0, sizeof(ConvData)); |
| 51 data->sharedData.structSize=sizeof(UConverterSharedData); |
| 52 data->staticData.structSize=sizeof(UConverterStaticData); |
| 53 data->sharedData.staticData=&data->staticData; |
| 54 } |
| 55 |
| 56 static void |
| 57 cleanupConvData(ConvData *data) { |
| 58 if(data!=NULL) { |
| 59 if(data->cnvData!=NULL) { |
| 60 data->cnvData->close(data->cnvData); |
| 61 data->cnvData=NULL; |
| 62 } |
| 63 if(data->extData!=NULL) { |
| 64 data->extData->close(data->extData); |
| 65 data->extData=NULL; |
| 66 } |
| 67 ucm_close(data->ucm); |
| 68 data->ucm=NULL; |
| 69 } |
| 70 } |
| 71 |
| 72 /* |
| 73 * from ucnvstat.c - static prototypes of data-based converters |
| 74 */ |
| 75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP
ORTED_CONVERTER_TYPES]; |
| 76 |
| 77 /* |
| 78 * Global - verbosity |
| 79 */ |
| 80 UBool VERBOSE = FALSE; |
| 81 UBool SMALL = FALSE; |
| 82 UBool IGNORE_SISO_CHECK = FALSE; |
| 83 |
| 84 static void |
| 85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCod
e); |
| 86 |
| 87 /* |
| 88 * Set up the UNewData and write the converter.. |
| 89 */ |
| 90 static void |
| 91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr
orCode *status); |
| 92 |
| 93 UBool haveCopyright=TRUE; |
| 94 |
| 95 static UDataInfo dataInfo={ |
| 96 sizeof(UDataInfo), |
| 97 0, |
| 98 |
| 99 U_IS_BIG_ENDIAN, |
| 100 U_CHARSET_FAMILY, |
| 101 sizeof(UChar), |
| 102 0, |
| 103 |
| 104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ |
| 105 {6, 2, 0, 0}, /* formatVersion */ |
| 106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ |
| 107 }; |
| 108 |
| 109 static void |
| 110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr
orCode *status) |
| 111 { |
| 112 UNewDataMemory *mem = NULL; |
| 113 uint32_t sz2; |
| 114 uint32_t size = 0; |
| 115 int32_t tableType; |
| 116 |
| 117 if(U_FAILURE(*status)) |
| 118 { |
| 119 return; |
| 120 } |
| 121 |
| 122 tableType=TABLE_NONE; |
| 123 if(data->cnvData!=NULL) { |
| 124 tableType|=TABLE_BASE; |
| 125 } |
| 126 if(data->extData!=NULL) { |
| 127 tableType|=TABLE_EXT; |
| 128 } |
| 129 |
| 130 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPY
RIGHT_STRING : NULL, status); |
| 131 |
| 132 if(U_FAILURE(*status)) |
| 133 { |
| 134 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", |
| 135 cnvName, |
| 136 "cnv", |
| 137 u_errorName(*status)); |
| 138 return; |
| 139 } |
| 140 |
| 141 if(VERBOSE) |
| 142 { |
| 143 printf("- Opened udata %s.%s\n", cnvName, "cnv"); |
| 144 } |
| 145 |
| 146 |
| 147 /* all read only, clean, platform independent data. Mmmm. :) */ |
| 148 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); |
| 149 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ |
| 150 /* Now, write the table */ |
| 151 if(tableType&TABLE_BASE) { |
| 152 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tabl
eType); |
| 153 } |
| 154 if(tableType&TABLE_EXT) { |
| 155 size += data->extData->write(data->extData, &data->staticData, mem, tabl
eType); |
| 156 } |
| 157 |
| 158 sz2 = udata_finish(mem, status); |
| 159 if(size != sz2) |
| 160 { |
| 161 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u b
ytes\n", (int)sz2, (int)size); |
| 162 *status=U_INTERNAL_PROGRAM_ERROR; |
| 163 } |
| 164 if(VERBOSE) |
| 165 { |
| 166 printf("- Wrote %u bytes to the udata.\n", (int)sz2); |
| 167 } |
| 168 } |
| 169 |
| 170 enum { |
| 171 OPT_HELP_H, |
| 172 OPT_HELP_QUESTION_MARK, |
| 173 OPT_COPYRIGHT, |
| 174 OPT_VERSION, |
| 175 OPT_DESTDIR, |
| 176 OPT_VERBOSE, |
| 177 OPT_SMALL, |
| 178 OPT_IGNORE_SISO_CHECK, |
| 179 OPT_COUNT |
| 180 }; |
| 181 |
| 182 static UOption options[]={ |
| 183 UOPTION_HELP_H, |
| 184 UOPTION_HELP_QUESTION_MARK, |
| 185 UOPTION_COPYRIGHT, |
| 186 UOPTION_VERSION, |
| 187 UOPTION_DESTDIR, |
| 188 UOPTION_VERBOSE, |
| 189 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, |
| 190 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } |
| 191 }; |
| 192 |
| 193 int main(int argc, char* argv[]) |
| 194 { |
| 195 ConvData data; |
| 196 UErrorCode err = U_ZERO_ERROR, localError; |
| 197 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
| 198 const char* destdir, *arg; |
| 199 size_t destdirlen; |
| 200 char* dot = NULL, *outBasename; |
| 201 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
| 202 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
| 203 UVersionInfo icuVersion; |
| 204 UBool printFilename; |
| 205 |
| 206 err = U_ZERO_ERROR; |
| 207 |
| 208 U_MAIN_INIT_ARGS(argc, argv); |
| 209 |
| 210 /* Set up the ICU version number */ |
| 211 u_getVersion(icuVersion); |
| 212 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); |
| 213 |
| 214 /* preset then read command line options */ |
| 215 options[OPT_DESTDIR].value=u_getDataDirectory(); |
| 216 argc=u_parseArgs(argc, argv, LENGTHOF(options), options); |
| 217 |
| 218 /* error handling, printing usage message */ |
| 219 if(argc<0) { |
| 220 fprintf(stderr, |
| 221 "error in command line argument \"%s\"\n", |
| 222 argv[-argc]); |
| 223 } else if(argc<2) { |
| 224 argc=-1; |
| 225 } |
| 226 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK
].doesOccur) { |
| 227 FILE *stdfile=argc<0 ? stderr : stdout; |
| 228 fprintf(stdfile, |
| 229 "usage: %s [-options] files...\n" |
| 230 "\tread .ucm codepage mapping files and write .cnv files\n" |
| 231 "options:\n" |
| 232 "\t-h or -? or --help this usage text\n" |
| 233 "\t-V or --version show a version message\n" |
| 234 "\t-c or --copyright include a copyright notice\n" |
| 235 "\t-d or --destdir destination directory, followed by the path\n
" |
| 236 "\t-v or --verbose Turn on verbose output\n", |
| 237 argv[0]); |
| 238 fprintf(stdfile, |
| 239 "\t --small Generate smaller .cnv files. They will be\n" |
| 240 "\t significantly smaller but may not be compatib
le with\n" |
| 241 "\t older versions of ICU and will require heap m
emory\n" |
| 242 "\t allocation when loaded.\n" |
| 243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n
"); |
| 244 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
| 245 } |
| 246 |
| 247 if(options[OPT_VERSION].doesOccur) { |
| 248 printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping
files and write .cnv files\n", |
| 249 dataInfo.formatVersion[0], dataInfo.formatVersion[1]); |
| 250 printf("%s\n", U_COPYRIGHT_STRING); |
| 251 exit(0); |
| 252 } |
| 253 |
| 254 /* get the options values */ |
| 255 haveCopyright = options[OPT_COPYRIGHT].doesOccur; |
| 256 destdir = options[OPT_DESTDIR].value; |
| 257 VERBOSE = options[OPT_VERBOSE].doesOccur; |
| 258 SMALL = options[OPT_SMALL].doesOccur; |
| 259 |
| 260 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { |
| 261 IGNORE_SISO_CHECK = TRUE; |
| 262 } |
| 263 |
| 264 if (destdir != NULL && *destdir != 0) { |
| 265 uprv_strcpy(outFileName, destdir); |
| 266 destdirlen = uprv_strlen(destdir); |
| 267 outBasename = outFileName + destdirlen; |
| 268 if (*(outBasename - 1) != U_FILE_SEP_CHAR) { |
| 269 *outBasename++ = U_FILE_SEP_CHAR; |
| 270 ++destdirlen; |
| 271 } |
| 272 } else { |
| 273 destdirlen = 0; |
| 274 outBasename = outFileName; |
| 275 } |
| 276 |
| 277 #if DEBUG |
| 278 { |
| 279 int i; |
| 280 printf("makeconv: processing %d files...\n", argc - 1); |
| 281 for(i=1; i<argc; ++i) { |
| 282 printf("%s ", argv[i]); |
| 283 } |
| 284 printf("\n"); |
| 285 fflush(stdout); |
| 286 } |
| 287 #endif |
| 288 |
| 289 err = U_ZERO_ERROR; |
| 290 printFilename = (UBool) (argc > 2 || VERBOSE); |
| 291 for (++argv; --argc; ++argv) |
| 292 { |
| 293 arg = getLongPathname(*argv); |
| 294 |
| 295 /* Check for potential buffer overflow */ |
| 296 if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH) |
| 297 { |
| 298 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); |
| 299 return U_BUFFER_OVERFLOW_ERROR; |
| 300 } |
| 301 |
| 302 /*produces the right destination path for display*/ |
| 303 if (destdirlen != 0) |
| 304 { |
| 305 const char *basename; |
| 306 |
| 307 /* find the last file sepator */ |
| 308 basename = findBasename(arg); |
| 309 uprv_strcpy(outBasename, basename); |
| 310 } |
| 311 else |
| 312 { |
| 313 uprv_strcpy(outFileName, arg); |
| 314 } |
| 315 |
| 316 /*removes the extension if any is found*/ |
| 317 dot = uprv_strrchr(outBasename, '.'); |
| 318 if (dot) |
| 319 { |
| 320 *dot = '\0'; |
| 321 } |
| 322 |
| 323 /* the basename without extension is the converter name */ |
| 324 uprv_strcpy(cnvName, outBasename); |
| 325 |
| 326 /*Adds the target extension*/ |
| 327 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); |
| 328 |
| 329 #if DEBUG |
| 330 printf("makeconv: processing %s ...\n", arg); |
| 331 fflush(stdout); |
| 332 #endif |
| 333 localError = U_ZERO_ERROR; |
| 334 initConvData(&data); |
| 335 createConverter(&data, arg, &localError); |
| 336 |
| 337 if (U_FAILURE(localError)) |
| 338 { |
| 339 /* if an error is found, print out an error msg and keep going */ |
| 340 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\"
(%s)\n", outFileName, arg, |
| 341 u_errorName(localError)); |
| 342 if(U_SUCCESS(err)) { |
| 343 err = localError; |
| 344 } |
| 345 } |
| 346 else |
| 347 { |
| 348 /* Insure the static data name matches the file name */ |
| 349 /* Changed to ignore directory and only compare base name |
| 350 LDH 1/2/08*/ |
| 351 char *p; |
| 352 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator *
/ |
| 353 |
| 354 if(p == NULL) /* OK, try alternate */ |
| 355 { |
| 356 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); |
| 357 if(p == NULL) |
| 358 { |
| 359 p=cnvName; /* If no separators, no problem */ |
| 360 } |
| 361 } |
| 362 else |
| 363 { |
| 364 p++; /* If found separtor, don't include it in compare */ |
| 365 } |
| 366 if(uprv_stricmp(p,data.staticData.name)) |
| 367 { |
| 368 fprintf(stderr, "Warning: %s%s claims to be '%s'\n", |
| 369 cnvName, CONVERTER_FILE_EXTENSION, |
| 370 data.staticData.name); |
| 371 } |
| 372 |
| 373 uprv_strcpy((char*)data.staticData.name, cnvName); |
| 374 |
| 375 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { |
| 376 fprintf(stderr, |
| 377 "Error: A converter name must contain only invariant charact
ers.\n" |
| 378 "%s is not a valid converter name.\n", |
| 379 data.staticData.name); |
| 380 if(U_SUCCESS(err)) { |
| 381 err = U_INVALID_TABLE_FORMAT; |
| 382 } |
| 383 } |
| 384 |
| 385 uprv_strcpy(cnvNameWithPkg, cnvName); |
| 386 |
| 387 localError = U_ZERO_ERROR; |
| 388 writeConverterData(&data, cnvNameWithPkg, destdir, &localError); |
| 389 |
| 390 if(U_FAILURE(localError)) |
| 391 { |
| 392 /* if an error is found, print out an error msg and keep going*/ |
| 393 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", o
utFileName, arg, |
| 394 u_errorName(localError)); |
| 395 if(U_SUCCESS(err)) { |
| 396 err = localError; |
| 397 } |
| 398 } |
| 399 else if (printFilename) |
| 400 { |
| 401 puts(outBasename); |
| 402 } |
| 403 } |
| 404 fflush(stdout); |
| 405 fflush(stderr); |
| 406 |
| 407 cleanupConvData(&data); |
| 408 } |
| 409 |
| 410 return err; |
| 411 } |
| 412 |
| 413 static void |
| 414 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID
) { |
| 415 if( (name[0]=='i' || name[0]=='I') && |
| 416 (name[1]=='b' || name[1]=='B') && |
| 417 (name[2]=='m' || name[2]=='M') |
| 418 ) { |
| 419 name+=3; |
| 420 if(*name=='-') { |
| 421 ++name; |
| 422 } |
| 423 *pPlatform=UCNV_IBM; |
| 424 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); |
| 425 } else { |
| 426 *pPlatform=UCNV_UNKNOWN; |
| 427 *pCCSID=0; |
| 428 } |
| 429 } |
| 430 |
| 431 static void |
| 432 readHeader(ConvData *data, |
| 433 FileStream* convFile, |
| 434 const char* converterName, |
| 435 UErrorCode *pErrorCode) { |
| 436 char line[200]; |
| 437 char *s, *key, *value; |
| 438 const UConverterStaticData *prototype; |
| 439 UConverterStaticData *staticData; |
| 440 |
| 441 if(U_FAILURE(*pErrorCode)) { |
| 442 return; |
| 443 } |
| 444 |
| 445 staticData=&data->staticData; |
| 446 staticData->platform=UCNV_IBM; |
| 447 staticData->subCharLen=0; |
| 448 |
| 449 while(T_FileStream_readLine(convFile, line, sizeof(line))) { |
| 450 /* basic parsing and handling of state-related items */ |
| 451 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { |
| 452 continue; |
| 453 } |
| 454 |
| 455 /* stop at the beginning of the mapping section */ |
| 456 if(uprv_strcmp(line, "CHARMAP")==0) { |
| 457 break; |
| 458 } |
| 459 |
| 460 /* collect the information from the header field, ignore unknown keys */ |
| 461 if(uprv_strcmp(key, "code_set_name")==0) { |
| 462 if(*value!=0) { |
| 463 uprv_strcpy((char *)staticData->name, value); |
| 464 getPlatformAndCCSIDFromName(value, &staticData->platform, &stati
cData->codepage); |
| 465 } |
| 466 } else if(uprv_strcmp(key, "subchar")==0) { |
| 467 uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
| 468 int8_t length; |
| 469 |
| 470 s=value; |
| 471 length=ucm_parseBytes(bytes, line, (const char **)&s); |
| 472 if(1<=length && length<=4 && *s==0) { |
| 473 staticData->subCharLen=length; |
| 474 uprv_memcpy(staticData->subChar, bytes, length); |
| 475 } else { |
| 476 fprintf(stderr, "error: illegal <subchar> %s\n", value); |
| 477 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 478 return; |
| 479 } |
| 480 } else if(uprv_strcmp(key, "subchar1")==0) { |
| 481 uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
| 482 |
| 483 s=value; |
| 484 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { |
| 485 staticData->subChar1=bytes[0]; |
| 486 } else { |
| 487 fprintf(stderr, "error: illegal <subchar1> %s\n", value); |
| 488 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 489 return; |
| 490 } |
| 491 } |
| 492 } |
| 493 |
| 494 /* copy values from the UCMFile to the static data */ |
| 495 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; |
| 496 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; |
| 497 staticData->conversionType=data->ucm->states.conversionType; |
| 498 |
| 499 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { |
| 500 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); |
| 501 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 502 return; |
| 503 } |
| 504 |
| 505 /* |
| 506 * Now that we know the type, copy any 'default' values from the table. |
| 507 * We need not check the type any further because the parser only |
| 508 * recognizes what we have prototypes for. |
| 509 * |
| 510 * For delta (extension-only) tables, copy values from the base file |
| 511 * instead, see createConverter(). |
| 512 */ |
| 513 if(data->ucm->baseName[0]==0) { |
| 514 prototype=ucnv_converterStaticData[staticData->conversionType]; |
| 515 if(prototype!=NULL) { |
| 516 if(staticData->name[0]==0) { |
| 517 uprv_strcpy((char *)staticData->name, prototype->name); |
| 518 } |
| 519 |
| 520 if(staticData->codepage==0) { |
| 521 staticData->codepage=prototype->codepage; |
| 522 } |
| 523 |
| 524 if(staticData->platform==0) { |
| 525 staticData->platform=prototype->platform; |
| 526 } |
| 527 |
| 528 if(staticData->minBytesPerChar==0) { |
| 529 staticData->minBytesPerChar=prototype->minBytesPerChar; |
| 530 } |
| 531 |
| 532 if(staticData->maxBytesPerChar==0) { |
| 533 staticData->maxBytesPerChar=prototype->maxBytesPerChar; |
| 534 } |
| 535 |
| 536 if(staticData->subCharLen==0) { |
| 537 staticData->subCharLen=prototype->subCharLen; |
| 538 if(prototype->subCharLen>0) { |
| 539 uprv_memcpy(staticData->subChar, prototype->subChar, prototy
pe->subCharLen); |
| 540 } |
| 541 } |
| 542 } |
| 543 } |
| 544 |
| 545 if(data->ucm->states.outputType<0) { |
| 546 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; |
| 547 } |
| 548 |
| 549 if( staticData->subChar1!=0 && |
| 550 (staticData->minBytesPerChar>1 || |
| 551 (staticData->conversionType!=UCNV_MBCS && |
| 552 staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) |
| 553 ) { |
| 554 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or
EBCDIC_STATEFUL\n"); |
| 555 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 556 } |
| 557 } |
| 558 |
| 559 /* return TRUE if a base table was read, FALSE for an extension table */ |
| 560 static UBool |
| 561 readFile(ConvData *data, const char* converterName, |
| 562 UErrorCode *pErrorCode) { |
| 563 char line[200]; |
| 564 char *end; |
| 565 FileStream *convFile; |
| 566 |
| 567 UCMStates *baseStates; |
| 568 UBool dataIsBase; |
| 569 |
| 570 if(U_FAILURE(*pErrorCode)) { |
| 571 return FALSE; |
| 572 } |
| 573 |
| 574 data->ucm=ucm_open(); |
| 575 |
| 576 convFile=T_FileStream_open(converterName, "r"); |
| 577 if(convFile==NULL) { |
| 578 *pErrorCode=U_FILE_ACCESS_ERROR; |
| 579 return FALSE; |
| 580 } |
| 581 |
| 582 readHeader(data, convFile, converterName, pErrorCode); |
| 583 if(U_FAILURE(*pErrorCode)) { |
| 584 return FALSE; |
| 585 } |
| 586 |
| 587 if(data->ucm->baseName[0]==0) { |
| 588 dataIsBase=TRUE; |
| 589 baseStates=&data->ucm->states; |
| 590 ucm_processStates(baseStates, IGNORE_SISO_CHECK); |
| 591 } else { |
| 592 dataIsBase=FALSE; |
| 593 baseStates=NULL; |
| 594 } |
| 595 |
| 596 /* read the base table */ |
| 597 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); |
| 598 if(U_FAILURE(*pErrorCode)) { |
| 599 return FALSE; |
| 600 } |
| 601 |
| 602 /* read an extension table if there is one */ |
| 603 while(T_FileStream_readLine(convFile, line, sizeof(line))) { |
| 604 end=uprv_strchr(line, 0); |
| 605 while(line<end && |
| 606 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\
t')) { |
| 607 --end; |
| 608 } |
| 609 *end=0; |
| 610 |
| 611 if(line[0]=='#' || u_skipWhitespace(line)==end) { |
| 612 continue; /* ignore empty and comment lines */ |
| 613 } |
| 614 |
| 615 if(0==uprv_strcmp(line, "CHARMAP")) { |
| 616 /* read the extension table */ |
| 617 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); |
| 618 } else { |
| 619 fprintf(stderr, "unexpected text after the base mapping table\n"); |
| 620 } |
| 621 break; |
| 622 } |
| 623 |
| 624 T_FileStream_close(convFile); |
| 625 |
| 626 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType=
=UCM_FLAGS_MIXED) { |
| 627 fprintf(stderr, "error: some entries have the mapping precision (with '|
'), some do not\n"); |
| 628 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 629 } |
| 630 |
| 631 return dataIsBase; |
| 632 } |
| 633 |
| 634 static void |
| 635 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod
e) { |
| 636 ConvData baseData; |
| 637 UBool dataIsBase; |
| 638 |
| 639 UConverterStaticData *staticData; |
| 640 UCMStates *states, *baseStates; |
| 641 |
| 642 if(U_FAILURE(*pErrorCode)) { |
| 643 return; |
| 644 } |
| 645 |
| 646 initConvData(data); |
| 647 |
| 648 dataIsBase=readFile(data, converterName, pErrorCode); |
| 649 if(U_FAILURE(*pErrorCode)) { |
| 650 return; |
| 651 } |
| 652 |
| 653 staticData=&data->staticData; |
| 654 states=&data->ucm->states; |
| 655 |
| 656 if(dataIsBase) { |
| 657 /* |
| 658 * Build a normal .cnv file with a base table |
| 659 * and an optional extension table. |
| 660 */ |
| 661 data->cnvData=MBCSOpen(data->ucm); |
| 662 if(data->cnvData==NULL) { |
| 663 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 664 |
| 665 } else if(!data->cnvData->isValid(data->cnvData, |
| 666 staticData->subChar, staticData->subCharLen) |
| 667 ) { |
| 668 fprintf(stderr, " the substitution character byte sequence is
illegal in this codepage structure!\n"); |
| 669 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 670 |
| 671 } else if(staticData->subChar1!=0 && |
| 672 !data->cnvData->isValid(data->cnvData, &staticData->subChar1
, 1) |
| 673 ) { |
| 674 fprintf(stderr, " the subchar1 byte is illegal in this codepag
e structure!\n"); |
| 675 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 676 |
| 677 } else if( |
| 678 data->ucm->ext->mappingsLength>0 && |
| 679 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm
->ext, FALSE) |
| 680 ) { |
| 681 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 682 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { |
| 683 /* sort the table so that it can be turned into UTF-8-friendly data
*/ |
| 684 ucm_sortTable(data->ucm->base); |
| 685 } |
| 686 |
| 687 if(U_SUCCESS(*pErrorCode)) { |
| 688 if( |
| 689 /* add the base table after ucm_checkBaseExt()! */ |
| 690 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->
staticData) |
| 691 ) { |
| 692 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 693 } else { |
| 694 /* |
| 695 * addTable() may have requested moving more mappings to the ext
ension table |
| 696 * if they fit into the base toUnicode table but not into the |
| 697 * base fromUnicode table. |
| 698 * (Especially for UTF-8-friendly fromUnicode tables.) |
| 699 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which c
auses them |
| 700 * to be excluded from the extension toUnicode data. |
| 701 * See MBCSOkForBaseFromUnicode() for which mappings do not fit
into |
| 702 * the base fromUnicode table. |
| 703 */ |
| 704 ucm_moveMappings(data->ucm->base, data->ucm->ext); |
| 705 ucm_sortTable(data->ucm->ext); |
| 706 if(data->ucm->ext->mappingsLength>0) { |
| 707 /* prepare the extension table, if there is one */ |
| 708 data->extData=CnvExtOpen(data->ucm); |
| 709 if(data->extData==NULL) { |
| 710 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 711 } else if( |
| 712 !data->extData->addTable(data->extData, data->ucm->ext,
&data->staticData) |
| 713 ) { |
| 714 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 715 } |
| 716 } |
| 717 } |
| 718 } |
| 719 } else { |
| 720 /* Build an extension-only .cnv file. */ |
| 721 char baseFilename[500]; |
| 722 char *basename; |
| 723 |
| 724 initConvData(&baseData); |
| 725 |
| 726 /* assemble a path/filename for data->ucm->baseName */ |
| 727 uprv_strcpy(baseFilename, converterName); |
| 728 basename=(char *)findBasename(baseFilename); |
| 729 uprv_strcpy(basename, data->ucm->baseName); |
| 730 uprv_strcat(basename, ".ucm"); |
| 731 |
| 732 /* read the base table */ |
| 733 dataIsBase=readFile(&baseData, baseFilename, pErrorCode); |
| 734 if(U_FAILURE(*pErrorCode)) { |
| 735 return; |
| 736 } else if(!dataIsBase) { |
| 737 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base tab
le file\n", baseFilename); |
| 738 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 739 } else { |
| 740 /* prepare the extension table */ |
| 741 data->extData=CnvExtOpen(data->ucm); |
| 742 if(data->extData==NULL) { |
| 743 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 744 } else { |
| 745 /* fill in gaps in extension file header fields */ |
| 746 UCMapping *m, *mLimit; |
| 747 uint8_t fallbackFlags; |
| 748 |
| 749 baseStates=&baseData.ucm->states; |
| 750 if(states->conversionType==UCNV_DBCS) { |
| 751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2
); |
| 752 } else if(states->minCharLength==0) { |
| 753 staticData->minBytesPerChar=(int8_t)(states->minCharLength=b
aseStates->minCharLength); |
| 754 } |
| 755 if(states->maxCharLength<states->minCharLength) { |
| 756 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=b
aseStates->maxCharLength); |
| 757 } |
| 758 |
| 759 if(staticData->subCharLen==0) { |
| 760 uprv_memcpy(staticData->subChar, baseData.staticData.subChar
, 4); |
| 761 staticData->subCharLen=baseData.staticData.subCharLen; |
| 762 } |
| 763 /* |
| 764 * do not copy subChar1 - |
| 765 * only use what is explicitly specified |
| 766 * because it cannot be unset in the extension file header |
| 767 */ |
| 768 |
| 769 /* get the fallback flags */ |
| 770 fallbackFlags=0; |
| 771 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base-
>mappingsLength; |
| 772 m<mLimit && fallbackFlags!=3; |
| 773 ++m |
| 774 ) { |
| 775 if(m->f==1) { |
| 776 fallbackFlags|=1; |
| 777 } else if(m->f==3) { |
| 778 fallbackFlags|=2; |
| 779 } |
| 780 } |
| 781 |
| 782 if(fallbackFlags&1) { |
| 783 staticData->hasFromUnicodeFallback=TRUE; |
| 784 } |
| 785 if(fallbackFlags&2) { |
| 786 staticData->hasToUnicodeFallback=TRUE; |
| 787 } |
| 788 |
| 789 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData
->subCharLen)) { |
| 790 fprintf(stderr, " the substitution character byte sequ
ence is illegal in this codepage structure!\n"); |
| 791 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 792 |
| 793 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseState
s, &staticData->subChar1, 1)) { |
| 794 fprintf(stderr, " the subchar1 byte is illegal in this
codepage structure!\n"); |
| 795 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 796 |
| 797 } else if( |
| 798 !ucm_checkValidity(data->ucm->ext, baseStates) || |
| 799 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm-
>ext, data->ucm->ext, FALSE) |
| 800 ) { |
| 801 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 802 } else { |
| 803 if(states->maxCharLength>1) { |
| 804 /* |
| 805 * When building a normal .cnv file with a base table |
| 806 * for an MBCS (not SBCS) table with explicit precision
flags, |
| 807 * the MBCSAddTable() function marks some mappings for m
oving |
| 808 * to the extension table. |
| 809 * They fit into the base toUnicode table but not into t
he |
| 810 * base fromUnicode table. |
| 811 * (Note: We do have explicit precision flags because th
ey are |
| 812 * required for extension table generation, and |
| 813 * ucm_checkBaseExt() verified it.) |
| 814 * |
| 815 * We do not call MBCSAddTable() here (we probably could
) |
| 816 * so we need to do the analysis before building the ext
ension table. |
| 817 * We assume that MBCSAddTable() will build a UTF-8-frie
ndly table. |
| 818 * Redundant mappings in the extension table are ok exce
pt they cost some size. |
| 819 * |
| 820 * Do this after ucm_checkBaseExt(). |
| 821 */ |
| 822 const MBCSData *mbcsData=MBCSGetDummy(); |
| 823 int32_t needsMove=0; |
| 824 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.uc
m->base->mappingsLength; |
| 825 m<mLimit; |
| 826 ++m |
| 827 ) { |
| 828 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m
->bLen, m->u, m->f)) { |
| 829 m->f|=MBCS_FROM_U_EXT_FLAG; |
| 830 m->moveFlag=UCM_MOVE_TO_EXT; |
| 831 ++needsMove; |
| 832 } |
| 833 } |
| 834 |
| 835 if(needsMove!=0) { |
| 836 ucm_moveMappings(baseData.ucm->base, data->ucm->ext)
; |
| 837 ucm_sortTable(data->ucm->ext); |
| 838 } |
| 839 } |
| 840 if(!data->extData->addTable(data->extData, data->ucm->ext, &
data->staticData)) { |
| 841 *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 842 } |
| 843 } |
| 844 } |
| 845 } |
| 846 |
| 847 cleanupConvData(&baseData); |
| 848 } |
| 849 } |
| 850 |
| 851 /* |
| 852 * Hey, Emacs, please set the following: |
| 853 * |
| 854 * Local Variables: |
| 855 * indent-tabs-mode: nil |
| 856 * End: |
| 857 * |
| 858 */ |
OLD | NEW |