| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 *******************************************************************************
* | |
| 3 * | |
| 4 * Copyright (C) 1998-2014, International Business Machines | |
| 5 * Corporation and others. All Rights Reserved. | |
| 6 * | |
| 7 *******************************************************************************
* | |
| 8 * | |
| 9 * | |
| 10 * makeconv.c: | |
| 11 * tool creating a binary (compressed) representation of the conversion mapping | |
| 12 * table (IBM NLTC ucmap format). | |
| 13 * | |
| 14 * 05/04/2000 helena Added fallback mapping into the picture... | |
| 15 * 06/29/2000 helena Major rewrite of the callback APIs. | |
| 16 */ | |
| 17 | |
| 18 #include <stdio.h> | |
| 19 #include "unicode/putil.h" | |
| 20 #include "unicode/ucnv_err.h" | |
| 21 #include "ucnv_bld.h" | |
| 22 #include "ucnv_imp.h" | |
| 23 #include "ucnv_cnv.h" | |
| 24 #include "cstring.h" | |
| 25 #include "cmemory.h" | |
| 26 #include "uinvchar.h" | |
| 27 #include "filestrm.h" | |
| 28 #include "toolutil.h" | |
| 29 #include "uoptions.h" | |
| 30 #include "unicode/udata.h" | |
| 31 #include "unewdata.h" | |
| 32 #include "uparse.h" | |
| 33 #include "ucm.h" | |
| 34 #include "makeconv.h" | |
| 35 #include "genmbcs.h" | |
| 36 | |
| 37 #define DEBUG 0 | |
| 38 | |
| 39 typedef struct ConvData { | |
| 40 UCMFile *ucm; | |
| 41 NewConverter *cnvData, *extData; | |
| 42 UConverterSharedData sharedData; | |
| 43 UConverterStaticData staticData; | |
| 44 } ConvData; | |
| 45 | |
| 46 static void | |
| 47 initConvData(ConvData *data) { | |
| 48 uprv_memset(data, 0, sizeof(ConvData)); | |
| 49 data->sharedData.structSize=sizeof(UConverterSharedData); | |
| 50 data->staticData.structSize=sizeof(UConverterStaticData); | |
| 51 data->sharedData.staticData=&data->staticData; | |
| 52 } | |
| 53 | |
| 54 static void | |
| 55 cleanupConvData(ConvData *data) { | |
| 56 if(data!=NULL) { | |
| 57 if(data->cnvData!=NULL) { | |
| 58 data->cnvData->close(data->cnvData); | |
| 59 data->cnvData=NULL; | |
| 60 } | |
| 61 if(data->extData!=NULL) { | |
| 62 data->extData->close(data->extData); | |
| 63 data->extData=NULL; | |
| 64 } | |
| 65 ucm_close(data->ucm); | |
| 66 data->ucm=NULL; | |
| 67 } | |
| 68 } | |
| 69 | |
| 70 /* | |
| 71 * from ucnvstat.c - static prototypes of data-based converters | |
| 72 */ | |
| 73 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP
ORTED_CONVERTER_TYPES]; | |
| 74 | |
| 75 /* | |
| 76 * Global - verbosity | |
| 77 */ | |
| 78 UBool VERBOSE = FALSE; | |
| 79 UBool SMALL = FALSE; | |
| 80 UBool IGNORE_SISO_CHECK = FALSE; | |
| 81 | |
| 82 static void | |
| 83 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCod
e); | |
| 84 | |
| 85 /* | |
| 86 * Set up the UNewData and write the converter.. | |
| 87 */ | |
| 88 static void | |
| 89 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr
orCode *status); | |
| 90 | |
| 91 UBool haveCopyright=TRUE; | |
| 92 | |
| 93 static UDataInfo dataInfo={ | |
| 94 sizeof(UDataInfo), | |
| 95 0, | |
| 96 | |
| 97 U_IS_BIG_ENDIAN, | |
| 98 U_CHARSET_FAMILY, | |
| 99 sizeof(UChar), | |
| 100 0, | |
| 101 | |
| 102 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ | |
| 103 {6, 2, 0, 0}, /* formatVersion */ | |
| 104 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ | |
| 105 }; | |
| 106 | |
| 107 static void | |
| 108 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr
orCode *status) | |
| 109 { | |
| 110 UNewDataMemory *mem = NULL; | |
| 111 uint32_t sz2; | |
| 112 uint32_t size = 0; | |
| 113 int32_t tableType; | |
| 114 | |
| 115 if(U_FAILURE(*status)) | |
| 116 { | |
| 117 return; | |
| 118 } | |
| 119 | |
| 120 tableType=TABLE_NONE; | |
| 121 if(data->cnvData!=NULL) { | |
| 122 tableType|=TABLE_BASE; | |
| 123 } | |
| 124 if(data->extData!=NULL) { | |
| 125 tableType|=TABLE_EXT; | |
| 126 } | |
| 127 | |
| 128 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPY
RIGHT_STRING : NULL, status); | |
| 129 | |
| 130 if(U_FAILURE(*status)) | |
| 131 { | |
| 132 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", | |
| 133 cnvName, | |
| 134 "cnv", | |
| 135 u_errorName(*status)); | |
| 136 return; | |
| 137 } | |
| 138 | |
| 139 if(VERBOSE) | |
| 140 { | |
| 141 printf("- Opened udata %s.%s\n", cnvName, "cnv"); | |
| 142 } | |
| 143 | |
| 144 | |
| 145 /* all read only, clean, platform independent data. Mmmm. :) */ | |
| 146 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); | |
| 147 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ | |
| 148 /* Now, write the table */ | |
| 149 if(tableType&TABLE_BASE) { | |
| 150 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tabl
eType); | |
| 151 } | |
| 152 if(tableType&TABLE_EXT) { | |
| 153 size += data->extData->write(data->extData, &data->staticData, mem, tabl
eType); | |
| 154 } | |
| 155 | |
| 156 sz2 = udata_finish(mem, status); | |
| 157 if(size != sz2) | |
| 158 { | |
| 159 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u b
ytes\n", (int)sz2, (int)size); | |
| 160 *status=U_INTERNAL_PROGRAM_ERROR; | |
| 161 } | |
| 162 if(VERBOSE) | |
| 163 { | |
| 164 printf("- Wrote %u bytes to the udata.\n", (int)sz2); | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 enum { | |
| 169 OPT_HELP_H, | |
| 170 OPT_HELP_QUESTION_MARK, | |
| 171 OPT_COPYRIGHT, | |
| 172 OPT_VERSION, | |
| 173 OPT_DESTDIR, | |
| 174 OPT_VERBOSE, | |
| 175 OPT_SMALL, | |
| 176 OPT_IGNORE_SISO_CHECK, | |
| 177 OPT_COUNT | |
| 178 }; | |
| 179 | |
| 180 static UOption options[]={ | |
| 181 UOPTION_HELP_H, | |
| 182 UOPTION_HELP_QUESTION_MARK, | |
| 183 UOPTION_COPYRIGHT, | |
| 184 UOPTION_VERSION, | |
| 185 UOPTION_DESTDIR, | |
| 186 UOPTION_VERBOSE, | |
| 187 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, | |
| 188 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } | |
| 189 }; | |
| 190 | |
| 191 int main(int argc, char* argv[]) | |
| 192 { | |
| 193 ConvData data; | |
| 194 UErrorCode err = U_ZERO_ERROR, localError; | |
| 195 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
| 196 const char* destdir, *arg; | |
| 197 size_t destdirlen; | |
| 198 char* dot = NULL, *outBasename; | |
| 199 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
| 200 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
| 201 UVersionInfo icuVersion; | |
| 202 UBool printFilename; | |
| 203 | |
| 204 err = U_ZERO_ERROR; | |
| 205 | |
| 206 U_MAIN_INIT_ARGS(argc, argv); | |
| 207 | |
| 208 /* Set up the ICU version number */ | |
| 209 u_getVersion(icuVersion); | |
| 210 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); | |
| 211 | |
| 212 /* preset then read command line options */ | |
| 213 options[OPT_DESTDIR].value=u_getDataDirectory(); | |
| 214 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); | |
| 215 | |
| 216 /* error handling, printing usage message */ | |
| 217 if(argc<0) { | |
| 218 fprintf(stderr, | |
| 219 "error in command line argument \"%s\"\n", | |
| 220 argv[-argc]); | |
| 221 } else if(argc<2) { | |
| 222 argc=-1; | |
| 223 } | |
| 224 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK
].doesOccur) { | |
| 225 FILE *stdfile=argc<0 ? stderr : stdout; | |
| 226 fprintf(stdfile, | |
| 227 "usage: %s [-options] files...\n" | |
| 228 "\tread .ucm codepage mapping files and write .cnv files\n" | |
| 229 "options:\n" | |
| 230 "\t-h or -? or --help this usage text\n" | |
| 231 "\t-V or --version show a version message\n" | |
| 232 "\t-c or --copyright include a copyright notice\n" | |
| 233 "\t-d or --destdir destination directory, followed by the path\n
" | |
| 234 "\t-v or --verbose Turn on verbose output\n", | |
| 235 argv[0]); | |
| 236 fprintf(stdfile, | |
| 237 "\t --small Generate smaller .cnv files. They will be\n" | |
| 238 "\t significantly smaller but may not be compatib
le with\n" | |
| 239 "\t older versions of ICU and will require heap m
emory\n" | |
| 240 "\t allocation when loaded.\n" | |
| 241 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n
"); | |
| 242 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; | |
| 243 } | |
| 244 | |
| 245 if(options[OPT_VERSION].doesOccur) { | |
| 246 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping f
iles and write .cnv files\n", | |
| 247 dataInfo.formatVersion[0], dataInfo.formatVersion[1]); | |
| 248 printf("%s\n", U_COPYRIGHT_STRING); | |
| 249 exit(0); | |
| 250 } | |
| 251 | |
| 252 /* get the options values */ | |
| 253 haveCopyright = options[OPT_COPYRIGHT].doesOccur; | |
| 254 destdir = options[OPT_DESTDIR].value; | |
| 255 VERBOSE = options[OPT_VERBOSE].doesOccur; | |
| 256 SMALL = options[OPT_SMALL].doesOccur; | |
| 257 | |
| 258 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { | |
| 259 IGNORE_SISO_CHECK = TRUE; | |
| 260 } | |
| 261 | |
| 262 if (destdir != NULL && *destdir != 0) { | |
| 263 uprv_strcpy(outFileName, destdir); | |
| 264 destdirlen = uprv_strlen(destdir); | |
| 265 outBasename = outFileName + destdirlen; | |
| 266 if (*(outBasename - 1) != U_FILE_SEP_CHAR) { | |
| 267 *outBasename++ = U_FILE_SEP_CHAR; | |
| 268 ++destdirlen; | |
| 269 } | |
| 270 } else { | |
| 271 destdirlen = 0; | |
| 272 outBasename = outFileName; | |
| 273 } | |
| 274 | |
| 275 #if DEBUG | |
| 276 { | |
| 277 int i; | |
| 278 printf("makeconv: processing %d files...\n", argc - 1); | |
| 279 for(i=1; i<argc; ++i) { | |
| 280 printf("%s ", argv[i]); | |
| 281 } | |
| 282 printf("\n"); | |
| 283 fflush(stdout); | |
| 284 } | |
| 285 #endif | |
| 286 | |
| 287 err = U_ZERO_ERROR; | |
| 288 printFilename = (UBool) (argc > 2 || VERBOSE); | |
| 289 for (++argv; --argc; ++argv) | |
| 290 { | |
| 291 arg = getLongPathname(*argv); | |
| 292 | |
| 293 /* Check for potential buffer overflow */ | |
| 294 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH) | |
| 295 { | |
| 296 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); | |
| 297 return U_BUFFER_OVERFLOW_ERROR; | |
| 298 } | |
| 299 | |
| 300 /*produces the right destination path for display*/ | |
| 301 if (destdirlen != 0) | |
| 302 { | |
| 303 const char *basename; | |
| 304 | |
| 305 /* find the last file sepator */ | |
| 306 basename = findBasename(arg); | |
| 307 uprv_strcpy(outBasename, basename); | |
| 308 } | |
| 309 else | |
| 310 { | |
| 311 uprv_strcpy(outFileName, arg); | |
| 312 } | |
| 313 | |
| 314 /*removes the extension if any is found*/ | |
| 315 dot = uprv_strrchr(outBasename, '.'); | |
| 316 if (dot) | |
| 317 { | |
| 318 *dot = '\0'; | |
| 319 } | |
| 320 | |
| 321 /* the basename without extension is the converter name */ | |
| 322 uprv_strcpy(cnvName, outBasename); | |
| 323 | |
| 324 /*Adds the target extension*/ | |
| 325 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); | |
| 326 | |
| 327 #if DEBUG | |
| 328 printf("makeconv: processing %s ...\n", arg); | |
| 329 fflush(stdout); | |
| 330 #endif | |
| 331 localError = U_ZERO_ERROR; | |
| 332 initConvData(&data); | |
| 333 createConverter(&data, arg, &localError); | |
| 334 | |
| 335 if (U_FAILURE(localError)) | |
| 336 { | |
| 337 /* if an error is found, print out an error msg and keep going */ | |
| 338 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\"
(%s)\n", outFileName, arg, | |
| 339 u_errorName(localError)); | |
| 340 if(U_SUCCESS(err)) { | |
| 341 err = localError; | |
| 342 } | |
| 343 } | |
| 344 else | |
| 345 { | |
| 346 /* Insure the static data name matches the file name */ | |
| 347 /* Changed to ignore directory and only compare base name | |
| 348 LDH 1/2/08*/ | |
| 349 char *p; | |
| 350 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator *
/ | |
| 351 | |
| 352 if(p == NULL) /* OK, try alternate */ | |
| 353 { | |
| 354 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); | |
| 355 if(p == NULL) | |
| 356 { | |
| 357 p=cnvName; /* If no separators, no problem */ | |
| 358 } | |
| 359 } | |
| 360 else | |
| 361 { | |
| 362 p++; /* If found separtor, don't include it in compare */ | |
| 363 } | |
| 364 if(uprv_stricmp(p,data.staticData.name)) | |
| 365 { | |
| 366 fprintf(stderr, "Warning: %s%s claims to be '%s'\n", | |
| 367 cnvName, CONVERTER_FILE_EXTENSION, | |
| 368 data.staticData.name); | |
| 369 } | |
| 370 | |
| 371 uprv_strcpy((char*)data.staticData.name, cnvName); | |
| 372 | |
| 373 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { | |
| 374 fprintf(stderr, | |
| 375 "Error: A converter name must contain only invariant charact
ers.\n" | |
| 376 "%s is not a valid converter name.\n", | |
| 377 data.staticData.name); | |
| 378 if(U_SUCCESS(err)) { | |
| 379 err = U_INVALID_TABLE_FORMAT; | |
| 380 } | |
| 381 } | |
| 382 | |
| 383 uprv_strcpy(cnvNameWithPkg, cnvName); | |
| 384 | |
| 385 localError = U_ZERO_ERROR; | |
| 386 writeConverterData(&data, cnvNameWithPkg, destdir, &localError); | |
| 387 | |
| 388 if(U_FAILURE(localError)) | |
| 389 { | |
| 390 /* if an error is found, print out an error msg and keep going*/ | |
| 391 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", o
utFileName, arg, | |
| 392 u_errorName(localError)); | |
| 393 if(U_SUCCESS(err)) { | |
| 394 err = localError; | |
| 395 } | |
| 396 } | |
| 397 else if (printFilename) | |
| 398 { | |
| 399 puts(outBasename); | |
| 400 } | |
| 401 } | |
| 402 fflush(stdout); | |
| 403 fflush(stderr); | |
| 404 | |
| 405 cleanupConvData(&data); | |
| 406 } | |
| 407 | |
| 408 return err; | |
| 409 } | |
| 410 | |
| 411 static void | |
| 412 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID
) { | |
| 413 if( (name[0]=='i' || name[0]=='I') && | |
| 414 (name[1]=='b' || name[1]=='B') && | |
| 415 (name[2]=='m' || name[2]=='M') | |
| 416 ) { | |
| 417 name+=3; | |
| 418 if(*name=='-') { | |
| 419 ++name; | |
| 420 } | |
| 421 *pPlatform=UCNV_IBM; | |
| 422 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); | |
| 423 } else { | |
| 424 *pPlatform=UCNV_UNKNOWN; | |
| 425 *pCCSID=0; | |
| 426 } | |
| 427 } | |
| 428 | |
| 429 static void | |
| 430 readHeader(ConvData *data, | |
| 431 FileStream* convFile, | |
| 432 const char* converterName, | |
| 433 UErrorCode *pErrorCode) { | |
| 434 char line[1024]; | |
| 435 char *s, *key, *value; | |
| 436 const UConverterStaticData *prototype; | |
| 437 UConverterStaticData *staticData; | |
| 438 | |
| 439 if(U_FAILURE(*pErrorCode)) { | |
| 440 return; | |
| 441 } | |
| 442 | |
| 443 staticData=&data->staticData; | |
| 444 staticData->platform=UCNV_IBM; | |
| 445 staticData->subCharLen=0; | |
| 446 | |
| 447 while(T_FileStream_readLine(convFile, line, sizeof(line))) { | |
| 448 /* basic parsing and handling of state-related items */ | |
| 449 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { | |
| 450 continue; | |
| 451 } | |
| 452 | |
| 453 /* stop at the beginning of the mapping section */ | |
| 454 if(uprv_strcmp(line, "CHARMAP")==0) { | |
| 455 break; | |
| 456 } | |
| 457 | |
| 458 /* collect the information from the header field, ignore unknown keys */ | |
| 459 if(uprv_strcmp(key, "code_set_name")==0) { | |
| 460 if(*value!=0) { | |
| 461 uprv_strcpy((char *)staticData->name, value); | |
| 462 getPlatformAndCCSIDFromName(value, &staticData->platform, &stati
cData->codepage); | |
| 463 } | |
| 464 } else if(uprv_strcmp(key, "subchar")==0) { | |
| 465 uint8_t bytes[UCNV_EXT_MAX_BYTES]; | |
| 466 int8_t length; | |
| 467 | |
| 468 s=value; | |
| 469 length=ucm_parseBytes(bytes, line, (const char **)&s); | |
| 470 if(1<=length && length<=4 && *s==0) { | |
| 471 staticData->subCharLen=length; | |
| 472 uprv_memcpy(staticData->subChar, bytes, length); | |
| 473 } else { | |
| 474 fprintf(stderr, "error: illegal <subchar> %s\n", value); | |
| 475 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 476 return; | |
| 477 } | |
| 478 } else if(uprv_strcmp(key, "subchar1")==0) { | |
| 479 uint8_t bytes[UCNV_EXT_MAX_BYTES]; | |
| 480 | |
| 481 s=value; | |
| 482 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { | |
| 483 staticData->subChar1=bytes[0]; | |
| 484 } else { | |
| 485 fprintf(stderr, "error: illegal <subchar1> %s\n", value); | |
| 486 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 487 return; | |
| 488 } | |
| 489 } | |
| 490 } | |
| 491 | |
| 492 /* copy values from the UCMFile to the static data */ | |
| 493 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; | |
| 494 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; | |
| 495 staticData->conversionType=data->ucm->states.conversionType; | |
| 496 | |
| 497 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { | |
| 498 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); | |
| 499 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 500 return; | |
| 501 } | |
| 502 | |
| 503 /* | |
| 504 * Now that we know the type, copy any 'default' values from the table. | |
| 505 * We need not check the type any further because the parser only | |
| 506 * recognizes what we have prototypes for. | |
| 507 * | |
| 508 * For delta (extension-only) tables, copy values from the base file | |
| 509 * instead, see createConverter(). | |
| 510 */ | |
| 511 if(data->ucm->baseName[0]==0) { | |
| 512 prototype=ucnv_converterStaticData[staticData->conversionType]; | |
| 513 if(prototype!=NULL) { | |
| 514 if(staticData->name[0]==0) { | |
| 515 uprv_strcpy((char *)staticData->name, prototype->name); | |
| 516 } | |
| 517 | |
| 518 if(staticData->codepage==0) { | |
| 519 staticData->codepage=prototype->codepage; | |
| 520 } | |
| 521 | |
| 522 if(staticData->platform==0) { | |
| 523 staticData->platform=prototype->platform; | |
| 524 } | |
| 525 | |
| 526 if(staticData->minBytesPerChar==0) { | |
| 527 staticData->minBytesPerChar=prototype->minBytesPerChar; | |
| 528 } | |
| 529 | |
| 530 if(staticData->maxBytesPerChar==0) { | |
| 531 staticData->maxBytesPerChar=prototype->maxBytesPerChar; | |
| 532 } | |
| 533 | |
| 534 if(staticData->subCharLen==0) { | |
| 535 staticData->subCharLen=prototype->subCharLen; | |
| 536 if(prototype->subCharLen>0) { | |
| 537 uprv_memcpy(staticData->subChar, prototype->subChar, prototy
pe->subCharLen); | |
| 538 } | |
| 539 } | |
| 540 } | |
| 541 } | |
| 542 | |
| 543 if(data->ucm->states.outputType<0) { | |
| 544 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; | |
| 545 } | |
| 546 | |
| 547 if( staticData->subChar1!=0 && | |
| 548 (staticData->minBytesPerChar>1 || | |
| 549 (staticData->conversionType!=UCNV_MBCS && | |
| 550 staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) | |
| 551 ) { | |
| 552 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or
EBCDIC_STATEFUL\n"); | |
| 553 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 554 } | |
| 555 } | |
| 556 | |
| 557 /* return TRUE if a base table was read, FALSE for an extension table */ | |
| 558 static UBool | |
| 559 readFile(ConvData *data, const char* converterName, | |
| 560 UErrorCode *pErrorCode) { | |
| 561 char line[1024]; | |
| 562 char *end; | |
| 563 FileStream *convFile; | |
| 564 | |
| 565 UCMStates *baseStates; | |
| 566 UBool dataIsBase; | |
| 567 | |
| 568 if(U_FAILURE(*pErrorCode)) { | |
| 569 return FALSE; | |
| 570 } | |
| 571 | |
| 572 data->ucm=ucm_open(); | |
| 573 | |
| 574 convFile=T_FileStream_open(converterName, "r"); | |
| 575 if(convFile==NULL) { | |
| 576 *pErrorCode=U_FILE_ACCESS_ERROR; | |
| 577 return FALSE; | |
| 578 } | |
| 579 | |
| 580 readHeader(data, convFile, converterName, pErrorCode); | |
| 581 if(U_FAILURE(*pErrorCode)) { | |
| 582 return FALSE; | |
| 583 } | |
| 584 | |
| 585 if(data->ucm->baseName[0]==0) { | |
| 586 dataIsBase=TRUE; | |
| 587 baseStates=&data->ucm->states; | |
| 588 ucm_processStates(baseStates, IGNORE_SISO_CHECK); | |
| 589 } else { | |
| 590 dataIsBase=FALSE; | |
| 591 baseStates=NULL; | |
| 592 } | |
| 593 | |
| 594 /* read the base table */ | |
| 595 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); | |
| 596 if(U_FAILURE(*pErrorCode)) { | |
| 597 return FALSE; | |
| 598 } | |
| 599 | |
| 600 /* read an extension table if there is one */ | |
| 601 while(T_FileStream_readLine(convFile, line, sizeof(line))) { | |
| 602 end=uprv_strchr(line, 0); | |
| 603 while(line<end && | |
| 604 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\
t')) { | |
| 605 --end; | |
| 606 } | |
| 607 *end=0; | |
| 608 | |
| 609 if(line[0]=='#' || u_skipWhitespace(line)==end) { | |
| 610 continue; /* ignore empty and comment lines */ | |
| 611 } | |
| 612 | |
| 613 if(0==uprv_strcmp(line, "CHARMAP")) { | |
| 614 /* read the extension table */ | |
| 615 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); | |
| 616 } else { | |
| 617 fprintf(stderr, "unexpected text after the base mapping table\n"); | |
| 618 } | |
| 619 break; | |
| 620 } | |
| 621 | |
| 622 T_FileStream_close(convFile); | |
| 623 | |
| 624 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType=
=UCM_FLAGS_MIXED) { | |
| 625 fprintf(stderr, "error: some entries have the mapping precision (with '|
'), some do not\n"); | |
| 626 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 627 } | |
| 628 | |
| 629 return dataIsBase; | |
| 630 } | |
| 631 | |
| 632 static void | |
| 633 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod
e) { | |
| 634 ConvData baseData; | |
| 635 UBool dataIsBase; | |
| 636 | |
| 637 UConverterStaticData *staticData; | |
| 638 UCMStates *states, *baseStates; | |
| 639 | |
| 640 if(U_FAILURE(*pErrorCode)) { | |
| 641 return; | |
| 642 } | |
| 643 | |
| 644 initConvData(data); | |
| 645 | |
| 646 dataIsBase=readFile(data, converterName, pErrorCode); | |
| 647 if(U_FAILURE(*pErrorCode)) { | |
| 648 return; | |
| 649 } | |
| 650 | |
| 651 staticData=&data->staticData; | |
| 652 states=&data->ucm->states; | |
| 653 | |
| 654 if(dataIsBase) { | |
| 655 /* | |
| 656 * Build a normal .cnv file with a base table | |
| 657 * and an optional extension table. | |
| 658 */ | |
| 659 data->cnvData=MBCSOpen(data->ucm); | |
| 660 if(data->cnvData==NULL) { | |
| 661 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
| 662 | |
| 663 } else if(!data->cnvData->isValid(data->cnvData, | |
| 664 staticData->subChar, staticData->subCharLen) | |
| 665 ) { | |
| 666 fprintf(stderr, " the substitution character byte sequence is
illegal in this codepage structure!\n"); | |
| 667 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 668 | |
| 669 } else if(staticData->subChar1!=0 && | |
| 670 !data->cnvData->isValid(data->cnvData, &staticData->subChar1
, 1) | |
| 671 ) { | |
| 672 fprintf(stderr, " the subchar1 byte is illegal in this codepag
e structure!\n"); | |
| 673 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 674 | |
| 675 } else if( | |
| 676 data->ucm->ext->mappingsLength>0 && | |
| 677 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm
->ext, FALSE) | |
| 678 ) { | |
| 679 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 680 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { | |
| 681 /* sort the table so that it can be turned into UTF-8-friendly data
*/ | |
| 682 ucm_sortTable(data->ucm->base); | |
| 683 } | |
| 684 | |
| 685 if(U_SUCCESS(*pErrorCode)) { | |
| 686 if( | |
| 687 /* add the base table after ucm_checkBaseExt()! */ | |
| 688 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->
staticData) | |
| 689 ) { | |
| 690 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 691 } else { | |
| 692 /* | |
| 693 * addTable() may have requested moving more mappings to the ext
ension table | |
| 694 * if they fit into the base toUnicode table but not into the | |
| 695 * base fromUnicode table. | |
| 696 * (Especially for UTF-8-friendly fromUnicode tables.) | |
| 697 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which c
auses them | |
| 698 * to be excluded from the extension toUnicode data. | |
| 699 * See MBCSOkForBaseFromUnicode() for which mappings do not fit
into | |
| 700 * the base fromUnicode table. | |
| 701 */ | |
| 702 ucm_moveMappings(data->ucm->base, data->ucm->ext); | |
| 703 ucm_sortTable(data->ucm->ext); | |
| 704 if(data->ucm->ext->mappingsLength>0) { | |
| 705 /* prepare the extension table, if there is one */ | |
| 706 data->extData=CnvExtOpen(data->ucm); | |
| 707 if(data->extData==NULL) { | |
| 708 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
| 709 } else if( | |
| 710 !data->extData->addTable(data->extData, data->ucm->ext,
&data->staticData) | |
| 711 ) { | |
| 712 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 713 } | |
| 714 } | |
| 715 } | |
| 716 } | |
| 717 } else { | |
| 718 /* Build an extension-only .cnv file. */ | |
| 719 char baseFilename[500]; | |
| 720 char *basename; | |
| 721 | |
| 722 initConvData(&baseData); | |
| 723 | |
| 724 /* assemble a path/filename for data->ucm->baseName */ | |
| 725 uprv_strcpy(baseFilename, converterName); | |
| 726 basename=(char *)findBasename(baseFilename); | |
| 727 uprv_strcpy(basename, data->ucm->baseName); | |
| 728 uprv_strcat(basename, ".ucm"); | |
| 729 | |
| 730 /* read the base table */ | |
| 731 dataIsBase=readFile(&baseData, baseFilename, pErrorCode); | |
| 732 if(U_FAILURE(*pErrorCode)) { | |
| 733 return; | |
| 734 } else if(!dataIsBase) { | |
| 735 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base tab
le file\n", baseFilename); | |
| 736 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 737 } else { | |
| 738 /* prepare the extension table */ | |
| 739 data->extData=CnvExtOpen(data->ucm); | |
| 740 if(data->extData==NULL) { | |
| 741 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
| 742 } else { | |
| 743 /* fill in gaps in extension file header fields */ | |
| 744 UCMapping *m, *mLimit; | |
| 745 uint8_t fallbackFlags; | |
| 746 | |
| 747 baseStates=&baseData.ucm->states; | |
| 748 if(states->conversionType==UCNV_DBCS) { | |
| 749 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2
); | |
| 750 } else if(states->minCharLength==0) { | |
| 751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=b
aseStates->minCharLength); | |
| 752 } | |
| 753 if(states->maxCharLength<states->minCharLength) { | |
| 754 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=b
aseStates->maxCharLength); | |
| 755 } | |
| 756 | |
| 757 if(staticData->subCharLen==0) { | |
| 758 uprv_memcpy(staticData->subChar, baseData.staticData.subChar
, 4); | |
| 759 staticData->subCharLen=baseData.staticData.subCharLen; | |
| 760 } | |
| 761 /* | |
| 762 * do not copy subChar1 - | |
| 763 * only use what is explicitly specified | |
| 764 * because it cannot be unset in the extension file header | |
| 765 */ | |
| 766 | |
| 767 /* get the fallback flags */ | |
| 768 fallbackFlags=0; | |
| 769 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base-
>mappingsLength; | |
| 770 m<mLimit && fallbackFlags!=3; | |
| 771 ++m | |
| 772 ) { | |
| 773 if(m->f==1) { | |
| 774 fallbackFlags|=1; | |
| 775 } else if(m->f==3) { | |
| 776 fallbackFlags|=2; | |
| 777 } | |
| 778 } | |
| 779 | |
| 780 if(fallbackFlags&1) { | |
| 781 staticData->hasFromUnicodeFallback=TRUE; | |
| 782 } | |
| 783 if(fallbackFlags&2) { | |
| 784 staticData->hasToUnicodeFallback=TRUE; | |
| 785 } | |
| 786 | |
| 787 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData
->subCharLen)) { | |
| 788 fprintf(stderr, " the substitution character byte sequ
ence is illegal in this codepage structure!\n"); | |
| 789 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 790 | |
| 791 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseState
s, &staticData->subChar1, 1)) { | |
| 792 fprintf(stderr, " the subchar1 byte is illegal in this
codepage structure!\n"); | |
| 793 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 794 | |
| 795 } else if( | |
| 796 !ucm_checkValidity(data->ucm->ext, baseStates) || | |
| 797 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm-
>ext, data->ucm->ext, FALSE) | |
| 798 ) { | |
| 799 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 800 } else { | |
| 801 if(states->maxCharLength>1) { | |
| 802 /* | |
| 803 * When building a normal .cnv file with a base table | |
| 804 * for an MBCS (not SBCS) table with explicit precision
flags, | |
| 805 * the MBCSAddTable() function marks some mappings for m
oving | |
| 806 * to the extension table. | |
| 807 * They fit into the base toUnicode table but not into t
he | |
| 808 * base fromUnicode table. | |
| 809 * (Note: We do have explicit precision flags because th
ey are | |
| 810 * required for extension table generation, and | |
| 811 * ucm_checkBaseExt() verified it.) | |
| 812 * | |
| 813 * We do not call MBCSAddTable() here (we probably could
) | |
| 814 * so we need to do the analysis before building the ext
ension table. | |
| 815 * We assume that MBCSAddTable() will build a UTF-8-frie
ndly table. | |
| 816 * Redundant mappings in the extension table are ok exce
pt they cost some size. | |
| 817 * | |
| 818 * Do this after ucm_checkBaseExt(). | |
| 819 */ | |
| 820 const MBCSData *mbcsData=MBCSGetDummy(); | |
| 821 int32_t needsMove=0; | |
| 822 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.uc
m->base->mappingsLength; | |
| 823 m<mLimit; | |
| 824 ++m | |
| 825 ) { | |
| 826 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m
->bLen, m->u, m->f)) { | |
| 827 m->f|=MBCS_FROM_U_EXT_FLAG; | |
| 828 m->moveFlag=UCM_MOVE_TO_EXT; | |
| 829 ++needsMove; | |
| 830 } | |
| 831 } | |
| 832 | |
| 833 if(needsMove!=0) { | |
| 834 ucm_moveMappings(baseData.ucm->base, data->ucm->ext)
; | |
| 835 ucm_sortTable(data->ucm->ext); | |
| 836 } | |
| 837 } | |
| 838 if(!data->extData->addTable(data->extData, data->ucm->ext, &
data->staticData)) { | |
| 839 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
| 840 } | |
| 841 } | |
| 842 } | |
| 843 } | |
| 844 | |
| 845 cleanupConvData(&baseData); | |
| 846 } | |
| 847 } | |
| 848 | |
| 849 /* | |
| 850 * Hey, Emacs, please set the following: | |
| 851 * | |
| 852 * Local Variables: | |
| 853 * indent-tabs-mode: nil | |
| 854 * End: | |
| 855 * | |
| 856 */ | |
| OLD | NEW |