Index: icu46/source/tools/makeconv/makeconv.c |
=================================================================== |
--- icu46/source/tools/makeconv/makeconv.c (revision 0) |
+++ icu46/source/tools/makeconv/makeconv.c (revision 0) |
@@ -0,0 +1,858 @@ |
+/* |
+ ******************************************************************************** |
+ * |
+ * Copyright (C) 1998-2010, International Business Machines |
+ * Corporation and others. All Rights Reserved. |
+ * |
+ ******************************************************************************** |
+ * |
+ * |
+ * makeconv.c: |
+ * tool creating a binary (compressed) representation of the conversion mapping |
+ * table (IBM NLTC ucmap format). |
+ * |
+ * 05/04/2000 helena Added fallback mapping into the picture... |
+ * 06/29/2000 helena Major rewrite of the callback APIs. |
+ */ |
+ |
+#include <stdio.h> |
+#include "unicode/putil.h" |
+#include "unicode/ucnv_err.h" |
+#include "ucnv_bld.h" |
+#include "ucnv_imp.h" |
+#include "ucnv_cnv.h" |
+#include "cstring.h" |
+#include "cmemory.h" |
+#include "uinvchar.h" |
+#include "filestrm.h" |
+#include "toolutil.h" |
+#include "uoptions.h" |
+#include "unicode/udata.h" |
+#include "unewdata.h" |
+#include "uparse.h" |
+#include "ucm.h" |
+#include "makeconv.h" |
+#include "genmbcs.h" |
+ |
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
+ |
+#define DEBUG 0 |
+ |
+typedef struct ConvData { |
+ UCMFile *ucm; |
+ NewConverter *cnvData, *extData; |
+ UConverterSharedData sharedData; |
+ UConverterStaticData staticData; |
+} ConvData; |
+ |
+static void |
+initConvData(ConvData *data) { |
+ uprv_memset(data, 0, sizeof(ConvData)); |
+ data->sharedData.structSize=sizeof(UConverterSharedData); |
+ data->staticData.structSize=sizeof(UConverterStaticData); |
+ data->sharedData.staticData=&data->staticData; |
+} |
+ |
+static void |
+cleanupConvData(ConvData *data) { |
+ if(data!=NULL) { |
+ if(data->cnvData!=NULL) { |
+ data->cnvData->close(data->cnvData); |
+ data->cnvData=NULL; |
+ } |
+ if(data->extData!=NULL) { |
+ data->extData->close(data->extData); |
+ data->extData=NULL; |
+ } |
+ ucm_close(data->ucm); |
+ data->ucm=NULL; |
+ } |
+} |
+ |
+/* |
+ * from ucnvstat.c - static prototypes of data-based converters |
+ */ |
+extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; |
+ |
+/* |
+ * Global - verbosity |
+ */ |
+UBool VERBOSE = FALSE; |
+UBool SMALL = FALSE; |
+UBool IGNORE_SISO_CHECK = FALSE; |
+ |
+static void |
+createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); |
+ |
+/* |
+ * Set up the UNewData and write the converter.. |
+ */ |
+static void |
+writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); |
+ |
+UBool haveCopyright=TRUE; |
+ |
+static UDataInfo dataInfo={ |
+ sizeof(UDataInfo), |
+ 0, |
+ |
+ U_IS_BIG_ENDIAN, |
+ U_CHARSET_FAMILY, |
+ sizeof(UChar), |
+ 0, |
+ |
+ {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ |
+ {6, 2, 0, 0}, /* formatVersion */ |
+ {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ |
+}; |
+ |
+static void |
+writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) |
+{ |
+ UNewDataMemory *mem = NULL; |
+ uint32_t sz2; |
+ uint32_t size = 0; |
+ int32_t tableType; |
+ |
+ if(U_FAILURE(*status)) |
+ { |
+ return; |
+ } |
+ |
+ tableType=TABLE_NONE; |
+ if(data->cnvData!=NULL) { |
+ tableType|=TABLE_BASE; |
+ } |
+ if(data->extData!=NULL) { |
+ tableType|=TABLE_EXT; |
+ } |
+ |
+ mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); |
+ |
+ if(U_FAILURE(*status)) |
+ { |
+ fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", |
+ cnvName, |
+ "cnv", |
+ u_errorName(*status)); |
+ return; |
+ } |
+ |
+ if(VERBOSE) |
+ { |
+ printf("- Opened udata %s.%s\n", cnvName, "cnv"); |
+ } |
+ |
+ |
+ /* all read only, clean, platform independent data. Mmmm. :) */ |
+ udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); |
+ size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ |
+ /* Now, write the table */ |
+ if(tableType&TABLE_BASE) { |
+ size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); |
+ } |
+ if(tableType&TABLE_EXT) { |
+ size += data->extData->write(data->extData, &data->staticData, mem, tableType); |
+ } |
+ |
+ sz2 = udata_finish(mem, status); |
+ if(size != sz2) |
+ { |
+ fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); |
+ *status=U_INTERNAL_PROGRAM_ERROR; |
+ } |
+ if(VERBOSE) |
+ { |
+ printf("- Wrote %u bytes to the udata.\n", (int)sz2); |
+ } |
+} |
+ |
+enum { |
+ OPT_HELP_H, |
+ OPT_HELP_QUESTION_MARK, |
+ OPT_COPYRIGHT, |
+ OPT_VERSION, |
+ OPT_DESTDIR, |
+ OPT_VERBOSE, |
+ OPT_SMALL, |
+ OPT_IGNORE_SISO_CHECK, |
+ OPT_COUNT |
+}; |
+ |
+static UOption options[]={ |
+ UOPTION_HELP_H, |
+ UOPTION_HELP_QUESTION_MARK, |
+ UOPTION_COPYRIGHT, |
+ UOPTION_VERSION, |
+ UOPTION_DESTDIR, |
+ UOPTION_VERBOSE, |
+ { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, |
+ { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } |
+}; |
+ |
+int main(int argc, char* argv[]) |
+{ |
+ ConvData data; |
+ UErrorCode err = U_ZERO_ERROR, localError; |
+ char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
+ const char* destdir, *arg; |
+ size_t destdirlen; |
+ char* dot = NULL, *outBasename; |
+ char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
+ char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
+ UVersionInfo icuVersion; |
+ UBool printFilename; |
+ |
+ err = U_ZERO_ERROR; |
+ |
+ U_MAIN_INIT_ARGS(argc, argv); |
+ |
+ /* Set up the ICU version number */ |
+ u_getVersion(icuVersion); |
+ uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); |
+ |
+ /* preset then read command line options */ |
+ options[OPT_DESTDIR].value=u_getDataDirectory(); |
+ argc=u_parseArgs(argc, argv, LENGTHOF(options), options); |
+ |
+ /* error handling, printing usage message */ |
+ if(argc<0) { |
+ fprintf(stderr, |
+ "error in command line argument \"%s\"\n", |
+ argv[-argc]); |
+ } else if(argc<2) { |
+ argc=-1; |
+ } |
+ if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { |
+ FILE *stdfile=argc<0 ? stderr : stdout; |
+ fprintf(stdfile, |
+ "usage: %s [-options] files...\n" |
+ "\tread .ucm codepage mapping files and write .cnv files\n" |
+ "options:\n" |
+ "\t-h or -? or --help this usage text\n" |
+ "\t-V or --version show a version message\n" |
+ "\t-c or --copyright include a copyright notice\n" |
+ "\t-d or --destdir destination directory, followed by the path\n" |
+ "\t-v or --verbose Turn on verbose output\n", |
+ argv[0]); |
+ fprintf(stdfile, |
+ "\t --small Generate smaller .cnv files. They will be\n" |
+ "\t significantly smaller but may not be compatible with\n" |
+ "\t older versions of ICU and will require heap memory\n" |
+ "\t allocation when loaded.\n" |
+ "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); |
+ return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
+ } |
+ |
+ if(options[OPT_VERSION].doesOccur) { |
+ printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", |
+ dataInfo.formatVersion[0], dataInfo.formatVersion[1]); |
+ printf("%s\n", U_COPYRIGHT_STRING); |
+ exit(0); |
+ } |
+ |
+ /* get the options values */ |
+ haveCopyright = options[OPT_COPYRIGHT].doesOccur; |
+ destdir = options[OPT_DESTDIR].value; |
+ VERBOSE = options[OPT_VERBOSE].doesOccur; |
+ SMALL = options[OPT_SMALL].doesOccur; |
+ |
+ if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { |
+ IGNORE_SISO_CHECK = TRUE; |
+ } |
+ |
+ if (destdir != NULL && *destdir != 0) { |
+ uprv_strcpy(outFileName, destdir); |
+ destdirlen = uprv_strlen(destdir); |
+ outBasename = outFileName + destdirlen; |
+ if (*(outBasename - 1) != U_FILE_SEP_CHAR) { |
+ *outBasename++ = U_FILE_SEP_CHAR; |
+ ++destdirlen; |
+ } |
+ } else { |
+ destdirlen = 0; |
+ outBasename = outFileName; |
+ } |
+ |
+#if DEBUG |
+ { |
+ int i; |
+ printf("makeconv: processing %d files...\n", argc - 1); |
+ for(i=1; i<argc; ++i) { |
+ printf("%s ", argv[i]); |
+ } |
+ printf("\n"); |
+ fflush(stdout); |
+ } |
+#endif |
+ |
+ err = U_ZERO_ERROR; |
+ printFilename = (UBool) (argc > 2 || VERBOSE); |
+ for (++argv; --argc; ++argv) |
+ { |
+ arg = getLongPathname(*argv); |
+ |
+ /* Check for potential buffer overflow */ |
+ if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH) |
+ { |
+ fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); |
+ return U_BUFFER_OVERFLOW_ERROR; |
+ } |
+ |
+ /*produces the right destination path for display*/ |
+ if (destdirlen != 0) |
+ { |
+ const char *basename; |
+ |
+ /* find the last file sepator */ |
+ basename = findBasename(arg); |
+ uprv_strcpy(outBasename, basename); |
+ } |
+ else |
+ { |
+ uprv_strcpy(outFileName, arg); |
+ } |
+ |
+ /*removes the extension if any is found*/ |
+ dot = uprv_strrchr(outBasename, '.'); |
+ if (dot) |
+ { |
+ *dot = '\0'; |
+ } |
+ |
+ /* the basename without extension is the converter name */ |
+ uprv_strcpy(cnvName, outBasename); |
+ |
+ /*Adds the target extension*/ |
+ uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); |
+ |
+#if DEBUG |
+ printf("makeconv: processing %s ...\n", arg); |
+ fflush(stdout); |
+#endif |
+ localError = U_ZERO_ERROR; |
+ initConvData(&data); |
+ createConverter(&data, arg, &localError); |
+ |
+ if (U_FAILURE(localError)) |
+ { |
+ /* if an error is found, print out an error msg and keep going */ |
+ fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg, |
+ u_errorName(localError)); |
+ if(U_SUCCESS(err)) { |
+ err = localError; |
+ } |
+ } |
+ else |
+ { |
+ /* Insure the static data name matches the file name */ |
+ /* Changed to ignore directory and only compare base name |
+ LDH 1/2/08*/ |
+ char *p; |
+ p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ |
+ |
+ if(p == NULL) /* OK, try alternate */ |
+ { |
+ p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); |
+ if(p == NULL) |
+ { |
+ p=cnvName; /* If no separators, no problem */ |
+ } |
+ } |
+ else |
+ { |
+ p++; /* If found separtor, don't include it in compare */ |
+ } |
+ if(uprv_stricmp(p,data.staticData.name)) |
+ { |
+ fprintf(stderr, "Warning: %s%s claims to be '%s'\n", |
+ cnvName, CONVERTER_FILE_EXTENSION, |
+ data.staticData.name); |
+ } |
+ |
+ uprv_strcpy((char*)data.staticData.name, cnvName); |
+ |
+ if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { |
+ fprintf(stderr, |
+ "Error: A converter name must contain only invariant characters.\n" |
+ "%s is not a valid converter name.\n", |
+ data.staticData.name); |
+ if(U_SUCCESS(err)) { |
+ err = U_INVALID_TABLE_FORMAT; |
+ } |
+ } |
+ |
+ uprv_strcpy(cnvNameWithPkg, cnvName); |
+ |
+ localError = U_ZERO_ERROR; |
+ writeConverterData(&data, cnvNameWithPkg, destdir, &localError); |
+ |
+ if(U_FAILURE(localError)) |
+ { |
+ /* if an error is found, print out an error msg and keep going*/ |
+ fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg, |
+ u_errorName(localError)); |
+ if(U_SUCCESS(err)) { |
+ err = localError; |
+ } |
+ } |
+ else if (printFilename) |
+ { |
+ puts(outBasename); |
+ } |
+ } |
+ fflush(stdout); |
+ fflush(stderr); |
+ |
+ cleanupConvData(&data); |
+ } |
+ |
+ return err; |
+} |
+ |
+static void |
+getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { |
+ if( (name[0]=='i' || name[0]=='I') && |
+ (name[1]=='b' || name[1]=='B') && |
+ (name[2]=='m' || name[2]=='M') |
+ ) { |
+ name+=3; |
+ if(*name=='-') { |
+ ++name; |
+ } |
+ *pPlatform=UCNV_IBM; |
+ *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); |
+ } else { |
+ *pPlatform=UCNV_UNKNOWN; |
+ *pCCSID=0; |
+ } |
+} |
+ |
+static void |
+readHeader(ConvData *data, |
+ FileStream* convFile, |
+ const char* converterName, |
+ UErrorCode *pErrorCode) { |
+ char line[200]; |
+ char *s, *key, *value; |
+ const UConverterStaticData *prototype; |
+ UConverterStaticData *staticData; |
+ |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ |
+ staticData=&data->staticData; |
+ staticData->platform=UCNV_IBM; |
+ staticData->subCharLen=0; |
+ |
+ while(T_FileStream_readLine(convFile, line, sizeof(line))) { |
+ /* basic parsing and handling of state-related items */ |
+ if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { |
+ continue; |
+ } |
+ |
+ /* stop at the beginning of the mapping section */ |
+ if(uprv_strcmp(line, "CHARMAP")==0) { |
+ break; |
+ } |
+ |
+ /* collect the information from the header field, ignore unknown keys */ |
+ if(uprv_strcmp(key, "code_set_name")==0) { |
+ if(*value!=0) { |
+ uprv_strcpy((char *)staticData->name, value); |
+ getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); |
+ } |
+ } else if(uprv_strcmp(key, "subchar")==0) { |
+ uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
+ int8_t length; |
+ |
+ s=value; |
+ length=ucm_parseBytes(bytes, line, (const char **)&s); |
+ if(1<=length && length<=4 && *s==0) { |
+ staticData->subCharLen=length; |
+ uprv_memcpy(staticData->subChar, bytes, length); |
+ } else { |
+ fprintf(stderr, "error: illegal <subchar> %s\n", value); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ return; |
+ } |
+ } else if(uprv_strcmp(key, "subchar1")==0) { |
+ uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
+ |
+ s=value; |
+ if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { |
+ staticData->subChar1=bytes[0]; |
+ } else { |
+ fprintf(stderr, "error: illegal <subchar1> %s\n", value); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ return; |
+ } |
+ } |
+ } |
+ |
+ /* copy values from the UCMFile to the static data */ |
+ staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; |
+ staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; |
+ staticData->conversionType=data->ucm->states.conversionType; |
+ |
+ if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { |
+ fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ return; |
+ } |
+ |
+ /* |
+ * Now that we know the type, copy any 'default' values from the table. |
+ * We need not check the type any further because the parser only |
+ * recognizes what we have prototypes for. |
+ * |
+ * For delta (extension-only) tables, copy values from the base file |
+ * instead, see createConverter(). |
+ */ |
+ if(data->ucm->baseName[0]==0) { |
+ prototype=ucnv_converterStaticData[staticData->conversionType]; |
+ if(prototype!=NULL) { |
+ if(staticData->name[0]==0) { |
+ uprv_strcpy((char *)staticData->name, prototype->name); |
+ } |
+ |
+ if(staticData->codepage==0) { |
+ staticData->codepage=prototype->codepage; |
+ } |
+ |
+ if(staticData->platform==0) { |
+ staticData->platform=prototype->platform; |
+ } |
+ |
+ if(staticData->minBytesPerChar==0) { |
+ staticData->minBytesPerChar=prototype->minBytesPerChar; |
+ } |
+ |
+ if(staticData->maxBytesPerChar==0) { |
+ staticData->maxBytesPerChar=prototype->maxBytesPerChar; |
+ } |
+ |
+ if(staticData->subCharLen==0) { |
+ staticData->subCharLen=prototype->subCharLen; |
+ if(prototype->subCharLen>0) { |
+ uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); |
+ } |
+ } |
+ } |
+ } |
+ |
+ if(data->ucm->states.outputType<0) { |
+ data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; |
+ } |
+ |
+ if( staticData->subChar1!=0 && |
+ (staticData->minBytesPerChar>1 || |
+ (staticData->conversionType!=UCNV_MBCS && |
+ staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) |
+ ) { |
+ fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ } |
+} |
+ |
+/* return TRUE if a base table was read, FALSE for an extension table */ |
+static UBool |
+readFile(ConvData *data, const char* converterName, |
+ UErrorCode *pErrorCode) { |
+ char line[200]; |
+ char *end; |
+ FileStream *convFile; |
+ |
+ UCMStates *baseStates; |
+ UBool dataIsBase; |
+ |
+ if(U_FAILURE(*pErrorCode)) { |
+ return FALSE; |
+ } |
+ |
+ data->ucm=ucm_open(); |
+ |
+ convFile=T_FileStream_open(converterName, "r"); |
+ if(convFile==NULL) { |
+ *pErrorCode=U_FILE_ACCESS_ERROR; |
+ return FALSE; |
+ } |
+ |
+ readHeader(data, convFile, converterName, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return FALSE; |
+ } |
+ |
+ if(data->ucm->baseName[0]==0) { |
+ dataIsBase=TRUE; |
+ baseStates=&data->ucm->states; |
+ ucm_processStates(baseStates, IGNORE_SISO_CHECK); |
+ } else { |
+ dataIsBase=FALSE; |
+ baseStates=NULL; |
+ } |
+ |
+ /* read the base table */ |
+ ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return FALSE; |
+ } |
+ |
+ /* read an extension table if there is one */ |
+ while(T_FileStream_readLine(convFile, line, sizeof(line))) { |
+ end=uprv_strchr(line, 0); |
+ while(line<end && |
+ (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) { |
+ --end; |
+ } |
+ *end=0; |
+ |
+ if(line[0]=='#' || u_skipWhitespace(line)==end) { |
+ continue; /* ignore empty and comment lines */ |
+ } |
+ |
+ if(0==uprv_strcmp(line, "CHARMAP")) { |
+ /* read the extension table */ |
+ ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); |
+ } else { |
+ fprintf(stderr, "unexpected text after the base mapping table\n"); |
+ } |
+ break; |
+ } |
+ |
+ T_FileStream_close(convFile); |
+ |
+ if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { |
+ fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ } |
+ |
+ return dataIsBase; |
+} |
+ |
+static void |
+createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { |
+ ConvData baseData; |
+ UBool dataIsBase; |
+ |
+ UConverterStaticData *staticData; |
+ UCMStates *states, *baseStates; |
+ |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ |
+ initConvData(data); |
+ |
+ dataIsBase=readFile(data, converterName, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ |
+ staticData=&data->staticData; |
+ states=&data->ucm->states; |
+ |
+ if(dataIsBase) { |
+ /* |
+ * Build a normal .cnv file with a base table |
+ * and an optional extension table. |
+ */ |
+ data->cnvData=MBCSOpen(data->ucm); |
+ if(data->cnvData==NULL) { |
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
+ |
+ } else if(!data->cnvData->isValid(data->cnvData, |
+ staticData->subChar, staticData->subCharLen) |
+ ) { |
+ fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ |
+ } else if(staticData->subChar1!=0 && |
+ !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) |
+ ) { |
+ fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ |
+ } else if( |
+ data->ucm->ext->mappingsLength>0 && |
+ !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) |
+ ) { |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { |
+ /* sort the table so that it can be turned into UTF-8-friendly data */ |
+ ucm_sortTable(data->ucm->base); |
+ } |
+ |
+ if(U_SUCCESS(*pErrorCode)) { |
+ if( |
+ /* add the base table after ucm_checkBaseExt()! */ |
+ !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) |
+ ) { |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ } else { |
+ /* |
+ * addTable() may have requested moving more mappings to the extension table |
+ * if they fit into the base toUnicode table but not into the |
+ * base fromUnicode table. |
+ * (Especially for UTF-8-friendly fromUnicode tables.) |
+ * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them |
+ * to be excluded from the extension toUnicode data. |
+ * See MBCSOkForBaseFromUnicode() for which mappings do not fit into |
+ * the base fromUnicode table. |
+ */ |
+ ucm_moveMappings(data->ucm->base, data->ucm->ext); |
+ ucm_sortTable(data->ucm->ext); |
+ if(data->ucm->ext->mappingsLength>0) { |
+ /* prepare the extension table, if there is one */ |
+ data->extData=CnvExtOpen(data->ucm); |
+ if(data->extData==NULL) { |
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
+ } else if( |
+ !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) |
+ ) { |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ } |
+ } |
+ } |
+ } |
+ } else { |
+ /* Build an extension-only .cnv file. */ |
+ char baseFilename[500]; |
+ char *basename; |
+ |
+ initConvData(&baseData); |
+ |
+ /* assemble a path/filename for data->ucm->baseName */ |
+ uprv_strcpy(baseFilename, converterName); |
+ basename=(char *)findBasename(baseFilename); |
+ uprv_strcpy(basename, data->ucm->baseName); |
+ uprv_strcat(basename, ".ucm"); |
+ |
+ /* read the base table */ |
+ dataIsBase=readFile(&baseData, baseFilename, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } else if(!dataIsBase) { |
+ fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ } else { |
+ /* prepare the extension table */ |
+ data->extData=CnvExtOpen(data->ucm); |
+ if(data->extData==NULL) { |
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
+ } else { |
+ /* fill in gaps in extension file header fields */ |
+ UCMapping *m, *mLimit; |
+ uint8_t fallbackFlags; |
+ |
+ baseStates=&baseData.ucm->states; |
+ if(states->conversionType==UCNV_DBCS) { |
+ staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); |
+ } else if(states->minCharLength==0) { |
+ staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); |
+ } |
+ if(states->maxCharLength<states->minCharLength) { |
+ staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); |
+ } |
+ |
+ if(staticData->subCharLen==0) { |
+ uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); |
+ staticData->subCharLen=baseData.staticData.subCharLen; |
+ } |
+ /* |
+ * do not copy subChar1 - |
+ * only use what is explicitly specified |
+ * because it cannot be unset in the extension file header |
+ */ |
+ |
+ /* get the fallback flags */ |
+ fallbackFlags=0; |
+ for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; |
+ m<mLimit && fallbackFlags!=3; |
+ ++m |
+ ) { |
+ if(m->f==1) { |
+ fallbackFlags|=1; |
+ } else if(m->f==3) { |
+ fallbackFlags|=2; |
+ } |
+ } |
+ |
+ if(fallbackFlags&1) { |
+ staticData->hasFromUnicodeFallback=TRUE; |
+ } |
+ if(fallbackFlags&2) { |
+ staticData->hasToUnicodeFallback=TRUE; |
+ } |
+ |
+ if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { |
+ fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ |
+ } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { |
+ fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ |
+ } else if( |
+ !ucm_checkValidity(data->ucm->ext, baseStates) || |
+ !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) |
+ ) { |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ } else { |
+ if(states->maxCharLength>1) { |
+ /* |
+ * When building a normal .cnv file with a base table |
+ * for an MBCS (not SBCS) table with explicit precision flags, |
+ * the MBCSAddTable() function marks some mappings for moving |
+ * to the extension table. |
+ * They fit into the base toUnicode table but not into the |
+ * base fromUnicode table. |
+ * (Note: We do have explicit precision flags because they are |
+ * required for extension table generation, and |
+ * ucm_checkBaseExt() verified it.) |
+ * |
+ * We do not call MBCSAddTable() here (we probably could) |
+ * so we need to do the analysis before building the extension table. |
+ * We assume that MBCSAddTable() will build a UTF-8-friendly table. |
+ * Redundant mappings in the extension table are ok except they cost some size. |
+ * |
+ * Do this after ucm_checkBaseExt(). |
+ */ |
+ const MBCSData *mbcsData=MBCSGetDummy(); |
+ int32_t needsMove=0; |
+ for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; |
+ m<mLimit; |
+ ++m |
+ ) { |
+ if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { |
+ m->f|=MBCS_FROM_U_EXT_FLAG; |
+ m->moveFlag=UCM_MOVE_TO_EXT; |
+ ++needsMove; |
+ } |
+ } |
+ |
+ if(needsMove!=0) { |
+ ucm_moveMappings(baseData.ucm->base, data->ucm->ext); |
+ ucm_sortTable(data->ucm->ext); |
+ } |
+ } |
+ if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { |
+ *pErrorCode=U_INVALID_TABLE_FORMAT; |
+ } |
+ } |
+ } |
+ } |
+ |
+ cleanupConvData(&baseData); |
+ } |
+} |
+ |
+/* |
+ * Hey, Emacs, please set the following: |
+ * |
+ * Local Variables: |
+ * indent-tabs-mode: nil |
+ * End: |
+ * |
+ */ |
Property changes on: icu46/source/tools/makeconv/makeconv.c |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |