Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(601)

Side by Side Diff: source/tools/makeconv/makeconv.c

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/tools/icuswap/icuswap.vcxproj ('k') | source/tools/makeconv/makeconv.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 ******************************************************************************* *
3 *
4 * Copyright (C) 1998-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************* *
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18 #include <stdio.h>
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
21 #include "ucnv_bld.h"
22 #include "ucnv_imp.h"
23 #include "ucnv_cnv.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "uinvchar.h"
27 #include "filestrm.h"
28 #include "toolutil.h"
29 #include "uoptions.h"
30 #include "unicode/udata.h"
31 #include "unewdata.h"
32 #include "uparse.h"
33 #include "ucm.h"
34 #include "makeconv.h"
35 #include "genmbcs.h"
36
37 #define DEBUG 0
38
39 typedef struct ConvData {
40 UCMFile *ucm;
41 NewConverter *cnvData, *extData;
42 UConverterSharedData sharedData;
43 UConverterStaticData staticData;
44 } ConvData;
45
46 static void
47 initConvData(ConvData *data) {
48 uprv_memset(data, 0, sizeof(ConvData));
49 data->sharedData.structSize=sizeof(UConverterSharedData);
50 data->staticData.structSize=sizeof(UConverterStaticData);
51 data->sharedData.staticData=&data->staticData;
52 }
53
54 static void
55 cleanupConvData(ConvData *data) {
56 if(data!=NULL) {
57 if(data->cnvData!=NULL) {
58 data->cnvData->close(data->cnvData);
59 data->cnvData=NULL;
60 }
61 if(data->extData!=NULL) {
62 data->extData->close(data->extData);
63 data->extData=NULL;
64 }
65 ucm_close(data->ucm);
66 data->ucm=NULL;
67 }
68 }
69
70 /*
71 * from ucnvstat.c - static prototypes of data-based converters
72 */
73 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP ORTED_CONVERTER_TYPES];
74
75 /*
76 * Global - verbosity
77 */
78 UBool VERBOSE = FALSE;
79 UBool SMALL = FALSE;
80 UBool IGNORE_SISO_CHECK = FALSE;
81
82 static void
83 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCod e);
84
85 /*
86 * Set up the UNewData and write the converter..
87 */
88 static void
89 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr orCode *status);
90
91 UBool haveCopyright=TRUE;
92
93 static UDataInfo dataInfo={
94 sizeof(UDataInfo),
95 0,
96
97 U_IS_BIG_ENDIAN,
98 U_CHARSET_FAMILY,
99 sizeof(UChar),
100 0,
101
102 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
103 {6, 2, 0, 0}, /* formatVersion */
104 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
105 };
106
107 static void
108 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr orCode *status)
109 {
110 UNewDataMemory *mem = NULL;
111 uint32_t sz2;
112 uint32_t size = 0;
113 int32_t tableType;
114
115 if(U_FAILURE(*status))
116 {
117 return;
118 }
119
120 tableType=TABLE_NONE;
121 if(data->cnvData!=NULL) {
122 tableType|=TABLE_BASE;
123 }
124 if(data->extData!=NULL) {
125 tableType|=TABLE_EXT;
126 }
127
128 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPY RIGHT_STRING : NULL, status);
129
130 if(U_FAILURE(*status))
131 {
132 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
133 cnvName,
134 "cnv",
135 u_errorName(*status));
136 return;
137 }
138
139 if(VERBOSE)
140 {
141 printf("- Opened udata %s.%s\n", cnvName, "cnv");
142 }
143
144
145 /* all read only, clean, platform independent data. Mmmm. :) */
146 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
147 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
148 /* Now, write the table */
149 if(tableType&TABLE_BASE) {
150 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tabl eType);
151 }
152 if(tableType&TABLE_EXT) {
153 size += data->extData->write(data->extData, &data->staticData, mem, tabl eType);
154 }
155
156 sz2 = udata_finish(mem, status);
157 if(size != sz2)
158 {
159 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u b ytes\n", (int)sz2, (int)size);
160 *status=U_INTERNAL_PROGRAM_ERROR;
161 }
162 if(VERBOSE)
163 {
164 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
165 }
166 }
167
168 enum {
169 OPT_HELP_H,
170 OPT_HELP_QUESTION_MARK,
171 OPT_COPYRIGHT,
172 OPT_VERSION,
173 OPT_DESTDIR,
174 OPT_VERBOSE,
175 OPT_SMALL,
176 OPT_IGNORE_SISO_CHECK,
177 OPT_COUNT
178 };
179
180 static UOption options[]={
181 UOPTION_HELP_H,
182 UOPTION_HELP_QUESTION_MARK,
183 UOPTION_COPYRIGHT,
184 UOPTION_VERSION,
185 UOPTION_DESTDIR,
186 UOPTION_VERBOSE,
187 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
188 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
189 };
190
191 int main(int argc, char* argv[])
192 {
193 ConvData data;
194 UErrorCode err = U_ZERO_ERROR, localError;
195 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
196 const char* destdir, *arg;
197 size_t destdirlen;
198 char* dot = NULL, *outBasename;
199 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
200 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
201 UVersionInfo icuVersion;
202 UBool printFilename;
203
204 err = U_ZERO_ERROR;
205
206 U_MAIN_INIT_ARGS(argc, argv);
207
208 /* Set up the ICU version number */
209 u_getVersion(icuVersion);
210 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
211
212 /* preset then read command line options */
213 options[OPT_DESTDIR].value=u_getDataDirectory();
214 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
215
216 /* error handling, printing usage message */
217 if(argc<0) {
218 fprintf(stderr,
219 "error in command line argument \"%s\"\n",
220 argv[-argc]);
221 } else if(argc<2) {
222 argc=-1;
223 }
224 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK ].doesOccur) {
225 FILE *stdfile=argc<0 ? stderr : stdout;
226 fprintf(stdfile,
227 "usage: %s [-options] files...\n"
228 "\tread .ucm codepage mapping files and write .cnv files\n"
229 "options:\n"
230 "\t-h or -? or --help this usage text\n"
231 "\t-V or --version show a version message\n"
232 "\t-c or --copyright include a copyright notice\n"
233 "\t-d or --destdir destination directory, followed by the path\n "
234 "\t-v or --verbose Turn on verbose output\n",
235 argv[0]);
236 fprintf(stdfile,
237 "\t --small Generate smaller .cnv files. They will be\n"
238 "\t significantly smaller but may not be compatib le with\n"
239 "\t older versions of ICU and will require heap m emory\n"
240 "\t allocation when loaded.\n"
241 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n ");
242 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
243 }
244
245 if(options[OPT_VERSION].doesOccur) {
246 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping f iles and write .cnv files\n",
247 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
248 printf("%s\n", U_COPYRIGHT_STRING);
249 exit(0);
250 }
251
252 /* get the options values */
253 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
254 destdir = options[OPT_DESTDIR].value;
255 VERBOSE = options[OPT_VERBOSE].doesOccur;
256 SMALL = options[OPT_SMALL].doesOccur;
257
258 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
259 IGNORE_SISO_CHECK = TRUE;
260 }
261
262 if (destdir != NULL && *destdir != 0) {
263 uprv_strcpy(outFileName, destdir);
264 destdirlen = uprv_strlen(destdir);
265 outBasename = outFileName + destdirlen;
266 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
267 *outBasename++ = U_FILE_SEP_CHAR;
268 ++destdirlen;
269 }
270 } else {
271 destdirlen = 0;
272 outBasename = outFileName;
273 }
274
275 #if DEBUG
276 {
277 int i;
278 printf("makeconv: processing %d files...\n", argc - 1);
279 for(i=1; i<argc; ++i) {
280 printf("%s ", argv[i]);
281 }
282 printf("\n");
283 fflush(stdout);
284 }
285 #endif
286
287 err = U_ZERO_ERROR;
288 printFilename = (UBool) (argc > 2 || VERBOSE);
289 for (++argv; --argc; ++argv)
290 {
291 arg = getLongPathname(*argv);
292
293 /* Check for potential buffer overflow */
294 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
295 {
296 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
297 return U_BUFFER_OVERFLOW_ERROR;
298 }
299
300 /*produces the right destination path for display*/
301 if (destdirlen != 0)
302 {
303 const char *basename;
304
305 /* find the last file sepator */
306 basename = findBasename(arg);
307 uprv_strcpy(outBasename, basename);
308 }
309 else
310 {
311 uprv_strcpy(outFileName, arg);
312 }
313
314 /*removes the extension if any is found*/
315 dot = uprv_strrchr(outBasename, '.');
316 if (dot)
317 {
318 *dot = '\0';
319 }
320
321 /* the basename without extension is the converter name */
322 uprv_strcpy(cnvName, outBasename);
323
324 /*Adds the target extension*/
325 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
326
327 #if DEBUG
328 printf("makeconv: processing %s ...\n", arg);
329 fflush(stdout);
330 #endif
331 localError = U_ZERO_ERROR;
332 initConvData(&data);
333 createConverter(&data, arg, &localError);
334
335 if (U_FAILURE(localError))
336 {
337 /* if an error is found, print out an error msg and keep going */
338 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
339 u_errorName(localError));
340 if(U_SUCCESS(err)) {
341 err = localError;
342 }
343 }
344 else
345 {
346 /* Insure the static data name matches the file name */
347 /* Changed to ignore directory and only compare base name
348 LDH 1/2/08*/
349 char *p;
350 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator * /
351
352 if(p == NULL) /* OK, try alternate */
353 {
354 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
355 if(p == NULL)
356 {
357 p=cnvName; /* If no separators, no problem */
358 }
359 }
360 else
361 {
362 p++; /* If found separtor, don't include it in compare */
363 }
364 if(uprv_stricmp(p,data.staticData.name))
365 {
366 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
367 cnvName, CONVERTER_FILE_EXTENSION,
368 data.staticData.name);
369 }
370
371 uprv_strcpy((char*)data.staticData.name, cnvName);
372
373 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
374 fprintf(stderr,
375 "Error: A converter name must contain only invariant charact ers.\n"
376 "%s is not a valid converter name.\n",
377 data.staticData.name);
378 if(U_SUCCESS(err)) {
379 err = U_INVALID_TABLE_FORMAT;
380 }
381 }
382
383 uprv_strcpy(cnvNameWithPkg, cnvName);
384
385 localError = U_ZERO_ERROR;
386 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
387
388 if(U_FAILURE(localError))
389 {
390 /* if an error is found, print out an error msg and keep going*/
391 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", o utFileName, arg,
392 u_errorName(localError));
393 if(U_SUCCESS(err)) {
394 err = localError;
395 }
396 }
397 else if (printFilename)
398 {
399 puts(outBasename);
400 }
401 }
402 fflush(stdout);
403 fflush(stderr);
404
405 cleanupConvData(&data);
406 }
407
408 return err;
409 }
410
411 static void
412 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID ) {
413 if( (name[0]=='i' || name[0]=='I') &&
414 (name[1]=='b' || name[1]=='B') &&
415 (name[2]=='m' || name[2]=='M')
416 ) {
417 name+=3;
418 if(*name=='-') {
419 ++name;
420 }
421 *pPlatform=UCNV_IBM;
422 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
423 } else {
424 *pPlatform=UCNV_UNKNOWN;
425 *pCCSID=0;
426 }
427 }
428
429 static void
430 readHeader(ConvData *data,
431 FileStream* convFile,
432 const char* converterName,
433 UErrorCode *pErrorCode) {
434 char line[1024];
435 char *s, *key, *value;
436 const UConverterStaticData *prototype;
437 UConverterStaticData *staticData;
438
439 if(U_FAILURE(*pErrorCode)) {
440 return;
441 }
442
443 staticData=&data->staticData;
444 staticData->platform=UCNV_IBM;
445 staticData->subCharLen=0;
446
447 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
448 /* basic parsing and handling of state-related items */
449 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
450 continue;
451 }
452
453 /* stop at the beginning of the mapping section */
454 if(uprv_strcmp(line, "CHARMAP")==0) {
455 break;
456 }
457
458 /* collect the information from the header field, ignore unknown keys */
459 if(uprv_strcmp(key, "code_set_name")==0) {
460 if(*value!=0) {
461 uprv_strcpy((char *)staticData->name, value);
462 getPlatformAndCCSIDFromName(value, &staticData->platform, &stati cData->codepage);
463 }
464 } else if(uprv_strcmp(key, "subchar")==0) {
465 uint8_t bytes[UCNV_EXT_MAX_BYTES];
466 int8_t length;
467
468 s=value;
469 length=ucm_parseBytes(bytes, line, (const char **)&s);
470 if(1<=length && length<=4 && *s==0) {
471 staticData->subCharLen=length;
472 uprv_memcpy(staticData->subChar, bytes, length);
473 } else {
474 fprintf(stderr, "error: illegal <subchar> %s\n", value);
475 *pErrorCode=U_INVALID_TABLE_FORMAT;
476 return;
477 }
478 } else if(uprv_strcmp(key, "subchar1")==0) {
479 uint8_t bytes[UCNV_EXT_MAX_BYTES];
480
481 s=value;
482 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
483 staticData->subChar1=bytes[0];
484 } else {
485 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
486 *pErrorCode=U_INVALID_TABLE_FORMAT;
487 return;
488 }
489 }
490 }
491
492 /* copy values from the UCMFile to the static data */
493 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
494 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
495 staticData->conversionType=data->ucm->states.conversionType;
496
497 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
498 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
499 *pErrorCode=U_INVALID_TABLE_FORMAT;
500 return;
501 }
502
503 /*
504 * Now that we know the type, copy any 'default' values from the table.
505 * We need not check the type any further because the parser only
506 * recognizes what we have prototypes for.
507 *
508 * For delta (extension-only) tables, copy values from the base file
509 * instead, see createConverter().
510 */
511 if(data->ucm->baseName[0]==0) {
512 prototype=ucnv_converterStaticData[staticData->conversionType];
513 if(prototype!=NULL) {
514 if(staticData->name[0]==0) {
515 uprv_strcpy((char *)staticData->name, prototype->name);
516 }
517
518 if(staticData->codepage==0) {
519 staticData->codepage=prototype->codepage;
520 }
521
522 if(staticData->platform==0) {
523 staticData->platform=prototype->platform;
524 }
525
526 if(staticData->minBytesPerChar==0) {
527 staticData->minBytesPerChar=prototype->minBytesPerChar;
528 }
529
530 if(staticData->maxBytesPerChar==0) {
531 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
532 }
533
534 if(staticData->subCharLen==0) {
535 staticData->subCharLen=prototype->subCharLen;
536 if(prototype->subCharLen>0) {
537 uprv_memcpy(staticData->subChar, prototype->subChar, prototy pe->subCharLen);
538 }
539 }
540 }
541 }
542
543 if(data->ucm->states.outputType<0) {
544 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
545 }
546
547 if( staticData->subChar1!=0 &&
548 (staticData->minBytesPerChar>1 ||
549 (staticData->conversionType!=UCNV_MBCS &&
550 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
551 ) {
552 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
553 *pErrorCode=U_INVALID_TABLE_FORMAT;
554 }
555 }
556
557 /* return TRUE if a base table was read, FALSE for an extension table */
558 static UBool
559 readFile(ConvData *data, const char* converterName,
560 UErrorCode *pErrorCode) {
561 char line[1024];
562 char *end;
563 FileStream *convFile;
564
565 UCMStates *baseStates;
566 UBool dataIsBase;
567
568 if(U_FAILURE(*pErrorCode)) {
569 return FALSE;
570 }
571
572 data->ucm=ucm_open();
573
574 convFile=T_FileStream_open(converterName, "r");
575 if(convFile==NULL) {
576 *pErrorCode=U_FILE_ACCESS_ERROR;
577 return FALSE;
578 }
579
580 readHeader(data, convFile, converterName, pErrorCode);
581 if(U_FAILURE(*pErrorCode)) {
582 return FALSE;
583 }
584
585 if(data->ucm->baseName[0]==0) {
586 dataIsBase=TRUE;
587 baseStates=&data->ucm->states;
588 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
589 } else {
590 dataIsBase=FALSE;
591 baseStates=NULL;
592 }
593
594 /* read the base table */
595 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
596 if(U_FAILURE(*pErrorCode)) {
597 return FALSE;
598 }
599
600 /* read an extension table if there is one */
601 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
602 end=uprv_strchr(line, 0);
603 while(line<end &&
604 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\ t')) {
605 --end;
606 }
607 *end=0;
608
609 if(line[0]=='#' || u_skipWhitespace(line)==end) {
610 continue; /* ignore empty and comment lines */
611 }
612
613 if(0==uprv_strcmp(line, "CHARMAP")) {
614 /* read the extension table */
615 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
616 } else {
617 fprintf(stderr, "unexpected text after the base mapping table\n");
618 }
619 break;
620 }
621
622 T_FileStream_close(convFile);
623
624 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType= =UCM_FLAGS_MIXED) {
625 fprintf(stderr, "error: some entries have the mapping precision (with '| '), some do not\n");
626 *pErrorCode=U_INVALID_TABLE_FORMAT;
627 }
628
629 return dataIsBase;
630 }
631
632 static void
633 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod e) {
634 ConvData baseData;
635 UBool dataIsBase;
636
637 UConverterStaticData *staticData;
638 UCMStates *states, *baseStates;
639
640 if(U_FAILURE(*pErrorCode)) {
641 return;
642 }
643
644 initConvData(data);
645
646 dataIsBase=readFile(data, converterName, pErrorCode);
647 if(U_FAILURE(*pErrorCode)) {
648 return;
649 }
650
651 staticData=&data->staticData;
652 states=&data->ucm->states;
653
654 if(dataIsBase) {
655 /*
656 * Build a normal .cnv file with a base table
657 * and an optional extension table.
658 */
659 data->cnvData=MBCSOpen(data->ucm);
660 if(data->cnvData==NULL) {
661 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
662
663 } else if(!data->cnvData->isValid(data->cnvData,
664 staticData->subChar, staticData->subCharLen)
665 ) {
666 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
667 *pErrorCode=U_INVALID_TABLE_FORMAT;
668
669 } else if(staticData->subChar1!=0 &&
670 !data->cnvData->isValid(data->cnvData, &staticData->subChar1 , 1)
671 ) {
672 fprintf(stderr, " the subchar1 byte is illegal in this codepag e structure!\n");
673 *pErrorCode=U_INVALID_TABLE_FORMAT;
674
675 } else if(
676 data->ucm->ext->mappingsLength>0 &&
677 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm ->ext, FALSE)
678 ) {
679 *pErrorCode=U_INVALID_TABLE_FORMAT;
680 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
681 /* sort the table so that it can be turned into UTF-8-friendly data */
682 ucm_sortTable(data->ucm->base);
683 }
684
685 if(U_SUCCESS(*pErrorCode)) {
686 if(
687 /* add the base table after ucm_checkBaseExt()! */
688 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data-> staticData)
689 ) {
690 *pErrorCode=U_INVALID_TABLE_FORMAT;
691 } else {
692 /*
693 * addTable() may have requested moving more mappings to the ext ension table
694 * if they fit into the base toUnicode table but not into the
695 * base fromUnicode table.
696 * (Especially for UTF-8-friendly fromUnicode tables.)
697 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which c auses them
698 * to be excluded from the extension toUnicode data.
699 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
700 * the base fromUnicode table.
701 */
702 ucm_moveMappings(data->ucm->base, data->ucm->ext);
703 ucm_sortTable(data->ucm->ext);
704 if(data->ucm->ext->mappingsLength>0) {
705 /* prepare the extension table, if there is one */
706 data->extData=CnvExtOpen(data->ucm);
707 if(data->extData==NULL) {
708 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
709 } else if(
710 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
711 ) {
712 *pErrorCode=U_INVALID_TABLE_FORMAT;
713 }
714 }
715 }
716 }
717 } else {
718 /* Build an extension-only .cnv file. */
719 char baseFilename[500];
720 char *basename;
721
722 initConvData(&baseData);
723
724 /* assemble a path/filename for data->ucm->baseName */
725 uprv_strcpy(baseFilename, converterName);
726 basename=(char *)findBasename(baseFilename);
727 uprv_strcpy(basename, data->ucm->baseName);
728 uprv_strcat(basename, ".ucm");
729
730 /* read the base table */
731 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
732 if(U_FAILURE(*pErrorCode)) {
733 return;
734 } else if(!dataIsBase) {
735 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base tab le file\n", baseFilename);
736 *pErrorCode=U_INVALID_TABLE_FORMAT;
737 } else {
738 /* prepare the extension table */
739 data->extData=CnvExtOpen(data->ucm);
740 if(data->extData==NULL) {
741 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
742 } else {
743 /* fill in gaps in extension file header fields */
744 UCMapping *m, *mLimit;
745 uint8_t fallbackFlags;
746
747 baseStates=&baseData.ucm->states;
748 if(states->conversionType==UCNV_DBCS) {
749 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2 );
750 } else if(states->minCharLength==0) {
751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=b aseStates->minCharLength);
752 }
753 if(states->maxCharLength<states->minCharLength) {
754 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=b aseStates->maxCharLength);
755 }
756
757 if(staticData->subCharLen==0) {
758 uprv_memcpy(staticData->subChar, baseData.staticData.subChar , 4);
759 staticData->subCharLen=baseData.staticData.subCharLen;
760 }
761 /*
762 * do not copy subChar1 -
763 * only use what is explicitly specified
764 * because it cannot be unset in the extension file header
765 */
766
767 /* get the fallback flags */
768 fallbackFlags=0;
769 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base- >mappingsLength;
770 m<mLimit && fallbackFlags!=3;
771 ++m
772 ) {
773 if(m->f==1) {
774 fallbackFlags|=1;
775 } else if(m->f==3) {
776 fallbackFlags|=2;
777 }
778 }
779
780 if(fallbackFlags&1) {
781 staticData->hasFromUnicodeFallback=TRUE;
782 }
783 if(fallbackFlags&2) {
784 staticData->hasToUnicodeFallback=TRUE;
785 }
786
787 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData ->subCharLen)) {
788 fprintf(stderr, " the substitution character byte sequ ence is illegal in this codepage structure!\n");
789 *pErrorCode=U_INVALID_TABLE_FORMAT;
790
791 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseState s, &staticData->subChar1, 1)) {
792 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
793 *pErrorCode=U_INVALID_TABLE_FORMAT;
794
795 } else if(
796 !ucm_checkValidity(data->ucm->ext, baseStates) ||
797 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm- >ext, data->ucm->ext, FALSE)
798 ) {
799 *pErrorCode=U_INVALID_TABLE_FORMAT;
800 } else {
801 if(states->maxCharLength>1) {
802 /*
803 * When building a normal .cnv file with a base table
804 * for an MBCS (not SBCS) table with explicit precision flags,
805 * the MBCSAddTable() function marks some mappings for m oving
806 * to the extension table.
807 * They fit into the base toUnicode table but not into t he
808 * base fromUnicode table.
809 * (Note: We do have explicit precision flags because th ey are
810 * required for extension table generation, and
811 * ucm_checkBaseExt() verified it.)
812 *
813 * We do not call MBCSAddTable() here (we probably could )
814 * so we need to do the analysis before building the ext ension table.
815 * We assume that MBCSAddTable() will build a UTF-8-frie ndly table.
816 * Redundant mappings in the extension table are ok exce pt they cost some size.
817 *
818 * Do this after ucm_checkBaseExt().
819 */
820 const MBCSData *mbcsData=MBCSGetDummy();
821 int32_t needsMove=0;
822 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.uc m->base->mappingsLength;
823 m<mLimit;
824 ++m
825 ) {
826 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m ->bLen, m->u, m->f)) {
827 m->f|=MBCS_FROM_U_EXT_FLAG;
828 m->moveFlag=UCM_MOVE_TO_EXT;
829 ++needsMove;
830 }
831 }
832
833 if(needsMove!=0) {
834 ucm_moveMappings(baseData.ucm->base, data->ucm->ext) ;
835 ucm_sortTable(data->ucm->ext);
836 }
837 }
838 if(!data->extData->addTable(data->extData, data->ucm->ext, & data->staticData)) {
839 *pErrorCode=U_INVALID_TABLE_FORMAT;
840 }
841 }
842 }
843 }
844
845 cleanupConvData(&baseData);
846 }
847 }
848
849 /*
850 * Hey, Emacs, please set the following:
851 *
852 * Local Variables:
853 * indent-tabs-mode: nil
854 * End:
855 *
856 */
OLDNEW
« no previous file with comments | « source/tools/icuswap/icuswap.vcxproj ('k') | source/tools/makeconv/makeconv.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698