OLD | NEW |
| (Empty) |
1 /* | |
2 *******************************************************************************
* | |
3 * | |
4 * Copyright (C) 1998-2014, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 *******************************************************************************
* | |
8 * | |
9 * | |
10 * makeconv.c: | |
11 * tool creating a binary (compressed) representation of the conversion mapping | |
12 * table (IBM NLTC ucmap format). | |
13 * | |
14 * 05/04/2000 helena Added fallback mapping into the picture... | |
15 * 06/29/2000 helena Major rewrite of the callback APIs. | |
16 */ | |
17 | |
18 #include <stdio.h> | |
19 #include "unicode/putil.h" | |
20 #include "unicode/ucnv_err.h" | |
21 #include "ucnv_bld.h" | |
22 #include "ucnv_imp.h" | |
23 #include "ucnv_cnv.h" | |
24 #include "cstring.h" | |
25 #include "cmemory.h" | |
26 #include "uinvchar.h" | |
27 #include "filestrm.h" | |
28 #include "toolutil.h" | |
29 #include "uoptions.h" | |
30 #include "unicode/udata.h" | |
31 #include "unewdata.h" | |
32 #include "uparse.h" | |
33 #include "ucm.h" | |
34 #include "makeconv.h" | |
35 #include "genmbcs.h" | |
36 | |
37 #define DEBUG 0 | |
38 | |
39 typedef struct ConvData { | |
40 UCMFile *ucm; | |
41 NewConverter *cnvData, *extData; | |
42 UConverterSharedData sharedData; | |
43 UConverterStaticData staticData; | |
44 } ConvData; | |
45 | |
46 static void | |
47 initConvData(ConvData *data) { | |
48 uprv_memset(data, 0, sizeof(ConvData)); | |
49 data->sharedData.structSize=sizeof(UConverterSharedData); | |
50 data->staticData.structSize=sizeof(UConverterStaticData); | |
51 data->sharedData.staticData=&data->staticData; | |
52 } | |
53 | |
54 static void | |
55 cleanupConvData(ConvData *data) { | |
56 if(data!=NULL) { | |
57 if(data->cnvData!=NULL) { | |
58 data->cnvData->close(data->cnvData); | |
59 data->cnvData=NULL; | |
60 } | |
61 if(data->extData!=NULL) { | |
62 data->extData->close(data->extData); | |
63 data->extData=NULL; | |
64 } | |
65 ucm_close(data->ucm); | |
66 data->ucm=NULL; | |
67 } | |
68 } | |
69 | |
70 /* | |
71 * from ucnvstat.c - static prototypes of data-based converters | |
72 */ | |
73 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP
ORTED_CONVERTER_TYPES]; | |
74 | |
75 /* | |
76 * Global - verbosity | |
77 */ | |
78 UBool VERBOSE = FALSE; | |
79 UBool SMALL = FALSE; | |
80 UBool IGNORE_SISO_CHECK = FALSE; | |
81 | |
82 static void | |
83 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCod
e); | |
84 | |
85 /* | |
86 * Set up the UNewData and write the converter.. | |
87 */ | |
88 static void | |
89 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr
orCode *status); | |
90 | |
91 UBool haveCopyright=TRUE; | |
92 | |
93 static UDataInfo dataInfo={ | |
94 sizeof(UDataInfo), | |
95 0, | |
96 | |
97 U_IS_BIG_ENDIAN, | |
98 U_CHARSET_FAMILY, | |
99 sizeof(UChar), | |
100 0, | |
101 | |
102 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ | |
103 {6, 2, 0, 0}, /* formatVersion */ | |
104 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ | |
105 }; | |
106 | |
107 static void | |
108 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr
orCode *status) | |
109 { | |
110 UNewDataMemory *mem = NULL; | |
111 uint32_t sz2; | |
112 uint32_t size = 0; | |
113 int32_t tableType; | |
114 | |
115 if(U_FAILURE(*status)) | |
116 { | |
117 return; | |
118 } | |
119 | |
120 tableType=TABLE_NONE; | |
121 if(data->cnvData!=NULL) { | |
122 tableType|=TABLE_BASE; | |
123 } | |
124 if(data->extData!=NULL) { | |
125 tableType|=TABLE_EXT; | |
126 } | |
127 | |
128 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPY
RIGHT_STRING : NULL, status); | |
129 | |
130 if(U_FAILURE(*status)) | |
131 { | |
132 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", | |
133 cnvName, | |
134 "cnv", | |
135 u_errorName(*status)); | |
136 return; | |
137 } | |
138 | |
139 if(VERBOSE) | |
140 { | |
141 printf("- Opened udata %s.%s\n", cnvName, "cnv"); | |
142 } | |
143 | |
144 | |
145 /* all read only, clean, platform independent data. Mmmm. :) */ | |
146 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); | |
147 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ | |
148 /* Now, write the table */ | |
149 if(tableType&TABLE_BASE) { | |
150 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tabl
eType); | |
151 } | |
152 if(tableType&TABLE_EXT) { | |
153 size += data->extData->write(data->extData, &data->staticData, mem, tabl
eType); | |
154 } | |
155 | |
156 sz2 = udata_finish(mem, status); | |
157 if(size != sz2) | |
158 { | |
159 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u b
ytes\n", (int)sz2, (int)size); | |
160 *status=U_INTERNAL_PROGRAM_ERROR; | |
161 } | |
162 if(VERBOSE) | |
163 { | |
164 printf("- Wrote %u bytes to the udata.\n", (int)sz2); | |
165 } | |
166 } | |
167 | |
168 enum { | |
169 OPT_HELP_H, | |
170 OPT_HELP_QUESTION_MARK, | |
171 OPT_COPYRIGHT, | |
172 OPT_VERSION, | |
173 OPT_DESTDIR, | |
174 OPT_VERBOSE, | |
175 OPT_SMALL, | |
176 OPT_IGNORE_SISO_CHECK, | |
177 OPT_COUNT | |
178 }; | |
179 | |
180 static UOption options[]={ | |
181 UOPTION_HELP_H, | |
182 UOPTION_HELP_QUESTION_MARK, | |
183 UOPTION_COPYRIGHT, | |
184 UOPTION_VERSION, | |
185 UOPTION_DESTDIR, | |
186 UOPTION_VERBOSE, | |
187 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, | |
188 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } | |
189 }; | |
190 | |
191 int main(int argc, char* argv[]) | |
192 { | |
193 ConvData data; | |
194 UErrorCode err = U_ZERO_ERROR, localError; | |
195 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
196 const char* destdir, *arg; | |
197 size_t destdirlen; | |
198 char* dot = NULL, *outBasename; | |
199 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
200 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
201 UVersionInfo icuVersion; | |
202 UBool printFilename; | |
203 | |
204 err = U_ZERO_ERROR; | |
205 | |
206 U_MAIN_INIT_ARGS(argc, argv); | |
207 | |
208 /* Set up the ICU version number */ | |
209 u_getVersion(icuVersion); | |
210 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); | |
211 | |
212 /* preset then read command line options */ | |
213 options[OPT_DESTDIR].value=u_getDataDirectory(); | |
214 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); | |
215 | |
216 /* error handling, printing usage message */ | |
217 if(argc<0) { | |
218 fprintf(stderr, | |
219 "error in command line argument \"%s\"\n", | |
220 argv[-argc]); | |
221 } else if(argc<2) { | |
222 argc=-1; | |
223 } | |
224 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK
].doesOccur) { | |
225 FILE *stdfile=argc<0 ? stderr : stdout; | |
226 fprintf(stdfile, | |
227 "usage: %s [-options] files...\n" | |
228 "\tread .ucm codepage mapping files and write .cnv files\n" | |
229 "options:\n" | |
230 "\t-h or -? or --help this usage text\n" | |
231 "\t-V or --version show a version message\n" | |
232 "\t-c or --copyright include a copyright notice\n" | |
233 "\t-d or --destdir destination directory, followed by the path\n
" | |
234 "\t-v or --verbose Turn on verbose output\n", | |
235 argv[0]); | |
236 fprintf(stdfile, | |
237 "\t --small Generate smaller .cnv files. They will be\n" | |
238 "\t significantly smaller but may not be compatib
le with\n" | |
239 "\t older versions of ICU and will require heap m
emory\n" | |
240 "\t allocation when loaded.\n" | |
241 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n
"); | |
242 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; | |
243 } | |
244 | |
245 if(options[OPT_VERSION].doesOccur) { | |
246 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping f
iles and write .cnv files\n", | |
247 dataInfo.formatVersion[0], dataInfo.formatVersion[1]); | |
248 printf("%s\n", U_COPYRIGHT_STRING); | |
249 exit(0); | |
250 } | |
251 | |
252 /* get the options values */ | |
253 haveCopyright = options[OPT_COPYRIGHT].doesOccur; | |
254 destdir = options[OPT_DESTDIR].value; | |
255 VERBOSE = options[OPT_VERBOSE].doesOccur; | |
256 SMALL = options[OPT_SMALL].doesOccur; | |
257 | |
258 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { | |
259 IGNORE_SISO_CHECK = TRUE; | |
260 } | |
261 | |
262 if (destdir != NULL && *destdir != 0) { | |
263 uprv_strcpy(outFileName, destdir); | |
264 destdirlen = uprv_strlen(destdir); | |
265 outBasename = outFileName + destdirlen; | |
266 if (*(outBasename - 1) != U_FILE_SEP_CHAR) { | |
267 *outBasename++ = U_FILE_SEP_CHAR; | |
268 ++destdirlen; | |
269 } | |
270 } else { | |
271 destdirlen = 0; | |
272 outBasename = outFileName; | |
273 } | |
274 | |
275 #if DEBUG | |
276 { | |
277 int i; | |
278 printf("makeconv: processing %d files...\n", argc - 1); | |
279 for(i=1; i<argc; ++i) { | |
280 printf("%s ", argv[i]); | |
281 } | |
282 printf("\n"); | |
283 fflush(stdout); | |
284 } | |
285 #endif | |
286 | |
287 err = U_ZERO_ERROR; | |
288 printFilename = (UBool) (argc > 2 || VERBOSE); | |
289 for (++argv; --argc; ++argv) | |
290 { | |
291 arg = getLongPathname(*argv); | |
292 | |
293 /* Check for potential buffer overflow */ | |
294 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH) | |
295 { | |
296 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); | |
297 return U_BUFFER_OVERFLOW_ERROR; | |
298 } | |
299 | |
300 /*produces the right destination path for display*/ | |
301 if (destdirlen != 0) | |
302 { | |
303 const char *basename; | |
304 | |
305 /* find the last file sepator */ | |
306 basename = findBasename(arg); | |
307 uprv_strcpy(outBasename, basename); | |
308 } | |
309 else | |
310 { | |
311 uprv_strcpy(outFileName, arg); | |
312 } | |
313 | |
314 /*removes the extension if any is found*/ | |
315 dot = uprv_strrchr(outBasename, '.'); | |
316 if (dot) | |
317 { | |
318 *dot = '\0'; | |
319 } | |
320 | |
321 /* the basename without extension is the converter name */ | |
322 uprv_strcpy(cnvName, outBasename); | |
323 | |
324 /*Adds the target extension*/ | |
325 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); | |
326 | |
327 #if DEBUG | |
328 printf("makeconv: processing %s ...\n", arg); | |
329 fflush(stdout); | |
330 #endif | |
331 localError = U_ZERO_ERROR; | |
332 initConvData(&data); | |
333 createConverter(&data, arg, &localError); | |
334 | |
335 if (U_FAILURE(localError)) | |
336 { | |
337 /* if an error is found, print out an error msg and keep going */ | |
338 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\"
(%s)\n", outFileName, arg, | |
339 u_errorName(localError)); | |
340 if(U_SUCCESS(err)) { | |
341 err = localError; | |
342 } | |
343 } | |
344 else | |
345 { | |
346 /* Insure the static data name matches the file name */ | |
347 /* Changed to ignore directory and only compare base name | |
348 LDH 1/2/08*/ | |
349 char *p; | |
350 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator *
/ | |
351 | |
352 if(p == NULL) /* OK, try alternate */ | |
353 { | |
354 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); | |
355 if(p == NULL) | |
356 { | |
357 p=cnvName; /* If no separators, no problem */ | |
358 } | |
359 } | |
360 else | |
361 { | |
362 p++; /* If found separtor, don't include it in compare */ | |
363 } | |
364 if(uprv_stricmp(p,data.staticData.name)) | |
365 { | |
366 fprintf(stderr, "Warning: %s%s claims to be '%s'\n", | |
367 cnvName, CONVERTER_FILE_EXTENSION, | |
368 data.staticData.name); | |
369 } | |
370 | |
371 uprv_strcpy((char*)data.staticData.name, cnvName); | |
372 | |
373 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { | |
374 fprintf(stderr, | |
375 "Error: A converter name must contain only invariant charact
ers.\n" | |
376 "%s is not a valid converter name.\n", | |
377 data.staticData.name); | |
378 if(U_SUCCESS(err)) { | |
379 err = U_INVALID_TABLE_FORMAT; | |
380 } | |
381 } | |
382 | |
383 uprv_strcpy(cnvNameWithPkg, cnvName); | |
384 | |
385 localError = U_ZERO_ERROR; | |
386 writeConverterData(&data, cnvNameWithPkg, destdir, &localError); | |
387 | |
388 if(U_FAILURE(localError)) | |
389 { | |
390 /* if an error is found, print out an error msg and keep going*/ | |
391 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", o
utFileName, arg, | |
392 u_errorName(localError)); | |
393 if(U_SUCCESS(err)) { | |
394 err = localError; | |
395 } | |
396 } | |
397 else if (printFilename) | |
398 { | |
399 puts(outBasename); | |
400 } | |
401 } | |
402 fflush(stdout); | |
403 fflush(stderr); | |
404 | |
405 cleanupConvData(&data); | |
406 } | |
407 | |
408 return err; | |
409 } | |
410 | |
411 static void | |
412 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID
) { | |
413 if( (name[0]=='i' || name[0]=='I') && | |
414 (name[1]=='b' || name[1]=='B') && | |
415 (name[2]=='m' || name[2]=='M') | |
416 ) { | |
417 name+=3; | |
418 if(*name=='-') { | |
419 ++name; | |
420 } | |
421 *pPlatform=UCNV_IBM; | |
422 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); | |
423 } else { | |
424 *pPlatform=UCNV_UNKNOWN; | |
425 *pCCSID=0; | |
426 } | |
427 } | |
428 | |
429 static void | |
430 readHeader(ConvData *data, | |
431 FileStream* convFile, | |
432 const char* converterName, | |
433 UErrorCode *pErrorCode) { | |
434 char line[1024]; | |
435 char *s, *key, *value; | |
436 const UConverterStaticData *prototype; | |
437 UConverterStaticData *staticData; | |
438 | |
439 if(U_FAILURE(*pErrorCode)) { | |
440 return; | |
441 } | |
442 | |
443 staticData=&data->staticData; | |
444 staticData->platform=UCNV_IBM; | |
445 staticData->subCharLen=0; | |
446 | |
447 while(T_FileStream_readLine(convFile, line, sizeof(line))) { | |
448 /* basic parsing and handling of state-related items */ | |
449 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { | |
450 continue; | |
451 } | |
452 | |
453 /* stop at the beginning of the mapping section */ | |
454 if(uprv_strcmp(line, "CHARMAP")==0) { | |
455 break; | |
456 } | |
457 | |
458 /* collect the information from the header field, ignore unknown keys */ | |
459 if(uprv_strcmp(key, "code_set_name")==0) { | |
460 if(*value!=0) { | |
461 uprv_strcpy((char *)staticData->name, value); | |
462 getPlatformAndCCSIDFromName(value, &staticData->platform, &stati
cData->codepage); | |
463 } | |
464 } else if(uprv_strcmp(key, "subchar")==0) { | |
465 uint8_t bytes[UCNV_EXT_MAX_BYTES]; | |
466 int8_t length; | |
467 | |
468 s=value; | |
469 length=ucm_parseBytes(bytes, line, (const char **)&s); | |
470 if(1<=length && length<=4 && *s==0) { | |
471 staticData->subCharLen=length; | |
472 uprv_memcpy(staticData->subChar, bytes, length); | |
473 } else { | |
474 fprintf(stderr, "error: illegal <subchar> %s\n", value); | |
475 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
476 return; | |
477 } | |
478 } else if(uprv_strcmp(key, "subchar1")==0) { | |
479 uint8_t bytes[UCNV_EXT_MAX_BYTES]; | |
480 | |
481 s=value; | |
482 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { | |
483 staticData->subChar1=bytes[0]; | |
484 } else { | |
485 fprintf(stderr, "error: illegal <subchar1> %s\n", value); | |
486 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
487 return; | |
488 } | |
489 } | |
490 } | |
491 | |
492 /* copy values from the UCMFile to the static data */ | |
493 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; | |
494 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; | |
495 staticData->conversionType=data->ucm->states.conversionType; | |
496 | |
497 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { | |
498 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); | |
499 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
500 return; | |
501 } | |
502 | |
503 /* | |
504 * Now that we know the type, copy any 'default' values from the table. | |
505 * We need not check the type any further because the parser only | |
506 * recognizes what we have prototypes for. | |
507 * | |
508 * For delta (extension-only) tables, copy values from the base file | |
509 * instead, see createConverter(). | |
510 */ | |
511 if(data->ucm->baseName[0]==0) { | |
512 prototype=ucnv_converterStaticData[staticData->conversionType]; | |
513 if(prototype!=NULL) { | |
514 if(staticData->name[0]==0) { | |
515 uprv_strcpy((char *)staticData->name, prototype->name); | |
516 } | |
517 | |
518 if(staticData->codepage==0) { | |
519 staticData->codepage=prototype->codepage; | |
520 } | |
521 | |
522 if(staticData->platform==0) { | |
523 staticData->platform=prototype->platform; | |
524 } | |
525 | |
526 if(staticData->minBytesPerChar==0) { | |
527 staticData->minBytesPerChar=prototype->minBytesPerChar; | |
528 } | |
529 | |
530 if(staticData->maxBytesPerChar==0) { | |
531 staticData->maxBytesPerChar=prototype->maxBytesPerChar; | |
532 } | |
533 | |
534 if(staticData->subCharLen==0) { | |
535 staticData->subCharLen=prototype->subCharLen; | |
536 if(prototype->subCharLen>0) { | |
537 uprv_memcpy(staticData->subChar, prototype->subChar, prototy
pe->subCharLen); | |
538 } | |
539 } | |
540 } | |
541 } | |
542 | |
543 if(data->ucm->states.outputType<0) { | |
544 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; | |
545 } | |
546 | |
547 if( staticData->subChar1!=0 && | |
548 (staticData->minBytesPerChar>1 || | |
549 (staticData->conversionType!=UCNV_MBCS && | |
550 staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) | |
551 ) { | |
552 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or
EBCDIC_STATEFUL\n"); | |
553 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
554 } | |
555 } | |
556 | |
557 /* return TRUE if a base table was read, FALSE for an extension table */ | |
558 static UBool | |
559 readFile(ConvData *data, const char* converterName, | |
560 UErrorCode *pErrorCode) { | |
561 char line[1024]; | |
562 char *end; | |
563 FileStream *convFile; | |
564 | |
565 UCMStates *baseStates; | |
566 UBool dataIsBase; | |
567 | |
568 if(U_FAILURE(*pErrorCode)) { | |
569 return FALSE; | |
570 } | |
571 | |
572 data->ucm=ucm_open(); | |
573 | |
574 convFile=T_FileStream_open(converterName, "r"); | |
575 if(convFile==NULL) { | |
576 *pErrorCode=U_FILE_ACCESS_ERROR; | |
577 return FALSE; | |
578 } | |
579 | |
580 readHeader(data, convFile, converterName, pErrorCode); | |
581 if(U_FAILURE(*pErrorCode)) { | |
582 return FALSE; | |
583 } | |
584 | |
585 if(data->ucm->baseName[0]==0) { | |
586 dataIsBase=TRUE; | |
587 baseStates=&data->ucm->states; | |
588 ucm_processStates(baseStates, IGNORE_SISO_CHECK); | |
589 } else { | |
590 dataIsBase=FALSE; | |
591 baseStates=NULL; | |
592 } | |
593 | |
594 /* read the base table */ | |
595 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); | |
596 if(U_FAILURE(*pErrorCode)) { | |
597 return FALSE; | |
598 } | |
599 | |
600 /* read an extension table if there is one */ | |
601 while(T_FileStream_readLine(convFile, line, sizeof(line))) { | |
602 end=uprv_strchr(line, 0); | |
603 while(line<end && | |
604 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\
t')) { | |
605 --end; | |
606 } | |
607 *end=0; | |
608 | |
609 if(line[0]=='#' || u_skipWhitespace(line)==end) { | |
610 continue; /* ignore empty and comment lines */ | |
611 } | |
612 | |
613 if(0==uprv_strcmp(line, "CHARMAP")) { | |
614 /* read the extension table */ | |
615 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); | |
616 } else { | |
617 fprintf(stderr, "unexpected text after the base mapping table\n"); | |
618 } | |
619 break; | |
620 } | |
621 | |
622 T_FileStream_close(convFile); | |
623 | |
624 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType=
=UCM_FLAGS_MIXED) { | |
625 fprintf(stderr, "error: some entries have the mapping precision (with '|
'), some do not\n"); | |
626 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
627 } | |
628 | |
629 return dataIsBase; | |
630 } | |
631 | |
632 static void | |
633 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod
e) { | |
634 ConvData baseData; | |
635 UBool dataIsBase; | |
636 | |
637 UConverterStaticData *staticData; | |
638 UCMStates *states, *baseStates; | |
639 | |
640 if(U_FAILURE(*pErrorCode)) { | |
641 return; | |
642 } | |
643 | |
644 initConvData(data); | |
645 | |
646 dataIsBase=readFile(data, converterName, pErrorCode); | |
647 if(U_FAILURE(*pErrorCode)) { | |
648 return; | |
649 } | |
650 | |
651 staticData=&data->staticData; | |
652 states=&data->ucm->states; | |
653 | |
654 if(dataIsBase) { | |
655 /* | |
656 * Build a normal .cnv file with a base table | |
657 * and an optional extension table. | |
658 */ | |
659 data->cnvData=MBCSOpen(data->ucm); | |
660 if(data->cnvData==NULL) { | |
661 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
662 | |
663 } else if(!data->cnvData->isValid(data->cnvData, | |
664 staticData->subChar, staticData->subCharLen) | |
665 ) { | |
666 fprintf(stderr, " the substitution character byte sequence is
illegal in this codepage structure!\n"); | |
667 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
668 | |
669 } else if(staticData->subChar1!=0 && | |
670 !data->cnvData->isValid(data->cnvData, &staticData->subChar1
, 1) | |
671 ) { | |
672 fprintf(stderr, " the subchar1 byte is illegal in this codepag
e structure!\n"); | |
673 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
674 | |
675 } else if( | |
676 data->ucm->ext->mappingsLength>0 && | |
677 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm
->ext, FALSE) | |
678 ) { | |
679 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
680 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { | |
681 /* sort the table so that it can be turned into UTF-8-friendly data
*/ | |
682 ucm_sortTable(data->ucm->base); | |
683 } | |
684 | |
685 if(U_SUCCESS(*pErrorCode)) { | |
686 if( | |
687 /* add the base table after ucm_checkBaseExt()! */ | |
688 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->
staticData) | |
689 ) { | |
690 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
691 } else { | |
692 /* | |
693 * addTable() may have requested moving more mappings to the ext
ension table | |
694 * if they fit into the base toUnicode table but not into the | |
695 * base fromUnicode table. | |
696 * (Especially for UTF-8-friendly fromUnicode tables.) | |
697 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which c
auses them | |
698 * to be excluded from the extension toUnicode data. | |
699 * See MBCSOkForBaseFromUnicode() for which mappings do not fit
into | |
700 * the base fromUnicode table. | |
701 */ | |
702 ucm_moveMappings(data->ucm->base, data->ucm->ext); | |
703 ucm_sortTable(data->ucm->ext); | |
704 if(data->ucm->ext->mappingsLength>0) { | |
705 /* prepare the extension table, if there is one */ | |
706 data->extData=CnvExtOpen(data->ucm); | |
707 if(data->extData==NULL) { | |
708 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
709 } else if( | |
710 !data->extData->addTable(data->extData, data->ucm->ext,
&data->staticData) | |
711 ) { | |
712 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
713 } | |
714 } | |
715 } | |
716 } | |
717 } else { | |
718 /* Build an extension-only .cnv file. */ | |
719 char baseFilename[500]; | |
720 char *basename; | |
721 | |
722 initConvData(&baseData); | |
723 | |
724 /* assemble a path/filename for data->ucm->baseName */ | |
725 uprv_strcpy(baseFilename, converterName); | |
726 basename=(char *)findBasename(baseFilename); | |
727 uprv_strcpy(basename, data->ucm->baseName); | |
728 uprv_strcat(basename, ".ucm"); | |
729 | |
730 /* read the base table */ | |
731 dataIsBase=readFile(&baseData, baseFilename, pErrorCode); | |
732 if(U_FAILURE(*pErrorCode)) { | |
733 return; | |
734 } else if(!dataIsBase) { | |
735 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base tab
le file\n", baseFilename); | |
736 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
737 } else { | |
738 /* prepare the extension table */ | |
739 data->extData=CnvExtOpen(data->ucm); | |
740 if(data->extData==NULL) { | |
741 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
742 } else { | |
743 /* fill in gaps in extension file header fields */ | |
744 UCMapping *m, *mLimit; | |
745 uint8_t fallbackFlags; | |
746 | |
747 baseStates=&baseData.ucm->states; | |
748 if(states->conversionType==UCNV_DBCS) { | |
749 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2
); | |
750 } else if(states->minCharLength==0) { | |
751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=b
aseStates->minCharLength); | |
752 } | |
753 if(states->maxCharLength<states->minCharLength) { | |
754 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=b
aseStates->maxCharLength); | |
755 } | |
756 | |
757 if(staticData->subCharLen==0) { | |
758 uprv_memcpy(staticData->subChar, baseData.staticData.subChar
, 4); | |
759 staticData->subCharLen=baseData.staticData.subCharLen; | |
760 } | |
761 /* | |
762 * do not copy subChar1 - | |
763 * only use what is explicitly specified | |
764 * because it cannot be unset in the extension file header | |
765 */ | |
766 | |
767 /* get the fallback flags */ | |
768 fallbackFlags=0; | |
769 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base-
>mappingsLength; | |
770 m<mLimit && fallbackFlags!=3; | |
771 ++m | |
772 ) { | |
773 if(m->f==1) { | |
774 fallbackFlags|=1; | |
775 } else if(m->f==3) { | |
776 fallbackFlags|=2; | |
777 } | |
778 } | |
779 | |
780 if(fallbackFlags&1) { | |
781 staticData->hasFromUnicodeFallback=TRUE; | |
782 } | |
783 if(fallbackFlags&2) { | |
784 staticData->hasToUnicodeFallback=TRUE; | |
785 } | |
786 | |
787 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData
->subCharLen)) { | |
788 fprintf(stderr, " the substitution character byte sequ
ence is illegal in this codepage structure!\n"); | |
789 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
790 | |
791 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseState
s, &staticData->subChar1, 1)) { | |
792 fprintf(stderr, " the subchar1 byte is illegal in this
codepage structure!\n"); | |
793 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
794 | |
795 } else if( | |
796 !ucm_checkValidity(data->ucm->ext, baseStates) || | |
797 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm-
>ext, data->ucm->ext, FALSE) | |
798 ) { | |
799 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
800 } else { | |
801 if(states->maxCharLength>1) { | |
802 /* | |
803 * When building a normal .cnv file with a base table | |
804 * for an MBCS (not SBCS) table with explicit precision
flags, | |
805 * the MBCSAddTable() function marks some mappings for m
oving | |
806 * to the extension table. | |
807 * They fit into the base toUnicode table but not into t
he | |
808 * base fromUnicode table. | |
809 * (Note: We do have explicit precision flags because th
ey are | |
810 * required for extension table generation, and | |
811 * ucm_checkBaseExt() verified it.) | |
812 * | |
813 * We do not call MBCSAddTable() here (we probably could
) | |
814 * so we need to do the analysis before building the ext
ension table. | |
815 * We assume that MBCSAddTable() will build a UTF-8-frie
ndly table. | |
816 * Redundant mappings in the extension table are ok exce
pt they cost some size. | |
817 * | |
818 * Do this after ucm_checkBaseExt(). | |
819 */ | |
820 const MBCSData *mbcsData=MBCSGetDummy(); | |
821 int32_t needsMove=0; | |
822 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.uc
m->base->mappingsLength; | |
823 m<mLimit; | |
824 ++m | |
825 ) { | |
826 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m
->bLen, m->u, m->f)) { | |
827 m->f|=MBCS_FROM_U_EXT_FLAG; | |
828 m->moveFlag=UCM_MOVE_TO_EXT; | |
829 ++needsMove; | |
830 } | |
831 } | |
832 | |
833 if(needsMove!=0) { | |
834 ucm_moveMappings(baseData.ucm->base, data->ucm->ext)
; | |
835 ucm_sortTable(data->ucm->ext); | |
836 } | |
837 } | |
838 if(!data->extData->addTable(data->extData, data->ucm->ext, &
data->staticData)) { | |
839 *pErrorCode=U_INVALID_TABLE_FORMAT; | |
840 } | |
841 } | |
842 } | |
843 } | |
844 | |
845 cleanupConvData(&baseData); | |
846 } | |
847 } | |
848 | |
849 /* | |
850 * Hey, Emacs, please set the following: | |
851 * | |
852 * Local Variables: | |
853 * indent-tabs-mode: nil | |
854 * End: | |
855 * | |
856 */ | |
OLD | NEW |