OLD | NEW |
(Empty) | |
| 1 /****************************************************************************** |
| 2 * Copyright (C) 2008-2010, International Business Machines |
| 3 * Corporation and others. All Rights Reserved. |
| 4 ******************************************************************************* |
| 5 */ |
| 6 #include "unicode/utypes.h" |
| 7 |
| 8 #include <stdio.h> |
| 9 #include <stdlib.h> |
| 10 #include "unicode/utypes.h" |
| 11 #include "unicode/putil.h" |
| 12 #include "cmemory.h" |
| 13 #include "cstring.h" |
| 14 #include "filestrm.h" |
| 15 #include "toolutil.h" |
| 16 #include "unicode/uclean.h" |
| 17 #include "unewdata.h" |
| 18 #include "putilimp.h" |
| 19 #include "pkg_gencmn.h" |
| 20 |
| 21 #define STRING_STORE_SIZE 100000 |
| 22 |
| 23 #define COMMON_DATA_NAME U_ICUDATA_NAME |
| 24 #define DATA_TYPE "dat" |
| 25 |
| 26 /* ICU package data file format (.dat files) ------------------------------- *** |
| 27 |
| 28 Description of the data format after the usual ICU data file header |
| 29 (UDataInfo etc.). |
| 30 |
| 31 Format version 1 |
| 32 |
| 33 A .dat package file contains a simple Table of Contents of item names, |
| 34 followed by the items themselves: |
| 35 |
| 36 1. ToC table |
| 37 |
| 38 uint32_t count; - number of items |
| 39 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: |
| 40 uint32_t nameOffset; - offset of the item name |
| 41 uint32_t dataOffset; - offset of the item data |
| 42 both are byte offsets from the beginning of the data |
| 43 |
| 44 2. item name strings |
| 45 |
| 46 All item names are stored as char * strings in one block between the ToC table |
| 47 and the data items. |
| 48 |
| 49 3. data items |
| 50 |
| 51 The data items are stored following the item names block. |
| 52 Each data item is 16-aligned. |
| 53 The data items are stored in the sorted order of their names. |
| 54 |
| 55 Therefore, the top of the name strings block is the offset of the first item, |
| 56 the length of the last item is the difference between its offset and |
| 57 the .dat file length, and the length of all previous items is the difference |
| 58 between its offset and the next one. |
| 59 |
| 60 ----------------------------------------------------------------------------- */ |
| 61 |
| 62 /* UDataInfo cf. udata.h */ |
| 63 static const UDataInfo dataInfo={ |
| 64 sizeof(UDataInfo), |
| 65 0, |
| 66 |
| 67 U_IS_BIG_ENDIAN, |
| 68 U_CHARSET_FAMILY, |
| 69 sizeof(UChar), |
| 70 0, |
| 71 |
| 72 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ |
| 73 {1, 0, 0, 0}, /* formatVersion */ |
| 74 {3, 0, 0, 0} /* dataVersion */ |
| 75 }; |
| 76 |
| 77 static uint32_t maxSize; |
| 78 |
| 79 static char stringStore[STRING_STORE_SIZE]; |
| 80 static uint32_t stringTop=0, basenameTotal=0; |
| 81 |
| 82 typedef struct { |
| 83 char *pathname, *basename; |
| 84 uint32_t basenameLength, basenameOffset, fileSize, fileOffset; |
| 85 } File; |
| 86 |
| 87 #define CHUNK_FILE_COUNT 256 |
| 88 static File *files = NULL; |
| 89 static uint32_t fileCount=0; |
| 90 static uint32_t fileMax = 0; |
| 91 |
| 92 |
| 93 static char *symPrefix = NULL; |
| 94 |
| 95 /* prototypes --------------------------------------------------------------- */ |
| 96 |
| 97 static void |
| 98 addFile(const char *filename, const char *name, const char *source, UBool source
TOC, UBool verbose); |
| 99 |
| 100 static char * |
| 101 allocString(uint32_t length); |
| 102 |
| 103 static int |
| 104 compareFiles(const void *file1, const void *file2); |
| 105 |
| 106 static char * |
| 107 pathToFullPath(const char *path, const char *source); |
| 108 |
| 109 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */ |
| 110 static void |
| 111 fixDirToTreePath(char *s); |
| 112 /* -------------------------------------------------------------------------- */ |
| 113 |
| 114 U_CAPI void U_EXPORT2 |
| 115 createCommonDataFile(const char *destDir, const char *name, const char *entrypoi
ntName, const char *type, const char *source, const char *copyRight, |
| 116 const char *dataFile, uint32_t max_size, UBool sourceTOC, U
Bool verbose, char *gencmnFileName) { |
| 117 static char buffer[4096]; |
| 118 char line[512]; |
| 119 char *s; |
| 120 UErrorCode errorCode=U_ZERO_ERROR; |
| 121 uint32_t i, fileOffset, basenameOffset, length, nread; |
| 122 FileStream *in, *file; |
| 123 |
| 124 maxSize = max_size; |
| 125 |
| 126 if (destDir == NULL) { |
| 127 destDir = u_getDataDirectory(); |
| 128 } |
| 129 if (name == NULL) { |
| 130 name = COMMON_DATA_NAME; |
| 131 } |
| 132 if (type == NULL) { |
| 133 type = DATA_TYPE; |
| 134 } |
| 135 if (source == NULL) { |
| 136 source = "."; |
| 137 } |
| 138 |
| 139 if (dataFile == NULL) { |
| 140 in = T_FileStream_stdin(); |
| 141 } else { |
| 142 in = T_FileStream_open(dataFile, "r"); |
| 143 if(in == NULL) { |
| 144 fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); |
| 145 exit(U_FILE_ACCESS_ERROR); |
| 146 } |
| 147 } |
| 148 |
| 149 if (verbose) { |
| 150 if(sourceTOC) { |
| 151 printf("generating %s_%s.c (table of contents source file)\n", name,
type); |
| 152 } else { |
| 153 printf("generating %s.%s (common data file with table of contents)\n
", name, type); |
| 154 } |
| 155 } |
| 156 |
| 157 /* read the list of files and get their lengths */ |
| 158 while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) { |
| 159 /* remove trailing newline characters */ |
| 160 s=line; |
| 161 while(*s!=0) { |
| 162 if(*s=='\r' || *s=='\n') { |
| 163 *s=0; |
| 164 break; |
| 165 } |
| 166 ++s; |
| 167 } |
| 168 |
| 169 /* check for comment */ |
| 170 |
| 171 if (*line == '#') { |
| 172 continue; |
| 173 } |
| 174 |
| 175 /* add the file */ |
| 176 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) |
| 177 { |
| 178 char *t; |
| 179 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { |
| 180 *t = U_FILE_SEP_CHAR; |
| 181 } |
| 182 } |
| 183 #endif |
| 184 addFile(getLongPathname(line), name, source, sourceTOC, verbose); |
| 185 } |
| 186 |
| 187 if(in!=T_FileStream_stdin()) { |
| 188 T_FileStream_close(in); |
| 189 } |
| 190 |
| 191 if(fileCount==0) { |
| 192 fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<
stdin>" : dataFile); |
| 193 return; |
| 194 } |
| 195 |
| 196 /* sort the files by basename */ |
| 197 qsort(files, fileCount, sizeof(File), compareFiles); |
| 198 |
| 199 if(!sourceTOC) { |
| 200 UNewDataMemory *out; |
| 201 |
| 202 /* determine the offsets of all basenames and files in this common one *
/ |
| 203 basenameOffset=4+8*fileCount; |
| 204 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; |
| 205 for(i=0; i<fileCount; ++i) { |
| 206 files[i].fileOffset=fileOffset; |
| 207 fileOffset+=(files[i].fileSize+15)&~0xf; |
| 208 files[i].basenameOffset=basenameOffset; |
| 209 basenameOffset+=files[i].basenameLength; |
| 210 } |
| 211 |
| 212 /* create the output file */ |
| 213 out=udata_create(destDir, type, name, |
| 214 &dataInfo, |
| 215 copyRight == NULL ? U_COPYRIGHT_STRING : copyRight, |
| 216 &errorCode); |
| 217 if(U_FAILURE(errorCode)) { |
| 218 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s
\n", |
| 219 destDir, name, type, |
| 220 u_errorName(errorCode)); |
| 221 exit(errorCode); |
| 222 } |
| 223 |
| 224 /* write the table of contents */ |
| 225 udata_write32(out, fileCount); |
| 226 for(i=0; i<fileCount; ++i) { |
| 227 udata_write32(out, files[i].basenameOffset); |
| 228 udata_write32(out, files[i].fileOffset); |
| 229 } |
| 230 |
| 231 /* write the basenames */ |
| 232 for(i=0; i<fileCount; ++i) { |
| 233 udata_writeString(out, files[i].basename, files[i].basenameLength); |
| 234 } |
| 235 length=4+8*fileCount+basenameTotal; |
| 236 |
| 237 /* copy the files */ |
| 238 for(i=0; i<fileCount; ++i) { |
| 239 /* pad to 16-align the next file */ |
| 240 length&=0xf; |
| 241 if(length!=0) { |
| 242 udata_writePadding(out, 16-length); |
| 243 } |
| 244 |
| 245 if (verbose) { |
| 246 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)file
s[i].fileSize, files[i].fileSize == 1 ? "" : "s"); |
| 247 } |
| 248 |
| 249 /* copy the next file */ |
| 250 file=T_FileStream_open(files[i].pathname, "rb"); |
| 251 if(file==NULL) { |
| 252 fprintf(stderr, "gencmn: unable to open listed file %s\n", files
[i].pathname); |
| 253 exit(U_FILE_ACCESS_ERROR); |
| 254 } |
| 255 for(nread = 0;;) { |
| 256 length=T_FileStream_read(file, buffer, sizeof(buffer)); |
| 257 if(length <= 0) { |
| 258 break; |
| 259 } |
| 260 nread += length; |
| 261 udata_writeBlock(out, buffer, length); |
| 262 } |
| 263 T_FileStream_close(file); |
| 264 length=files[i].fileSize; |
| 265 |
| 266 if (nread != files[i].fileSize) { |
| 267 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld b
yte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fi
leSize == 1 ? "" : "s"); |
| 268 exit(U_FILE_ACCESS_ERROR); |
| 269 } |
| 270 } |
| 271 |
| 272 /* pad to 16-align the last file (cleaner, avoids growing .dat files in
icuswap) */ |
| 273 length&=0xf; |
| 274 if(length!=0) { |
| 275 udata_writePadding(out, 16-length); |
| 276 } |
| 277 |
| 278 /* finish */ |
| 279 udata_finish(out, &errorCode); |
| 280 if(U_FAILURE(errorCode)) { |
| 281 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(
errorCode)); |
| 282 exit(errorCode); |
| 283 } |
| 284 } else { |
| 285 /* write a .c source file with the table of contents */ |
| 286 char *filename; |
| 287 FileStream *out; |
| 288 |
| 289 /* create the output filename */ |
| 290 filename=s=buffer; |
| 291 uprv_strcpy(filename, destDir); |
| 292 s=filename+uprv_strlen(filename); |
| 293 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) { |
| 294 *s++=U_FILE_SEP_CHAR; |
| 295 } |
| 296 uprv_strcpy(s, name); |
| 297 if(*(type)!=0) { |
| 298 s+=uprv_strlen(s); |
| 299 *s++='_'; |
| 300 uprv_strcpy(s, type); |
| 301 } |
| 302 s+=uprv_strlen(s); |
| 303 uprv_strcpy(s, ".c"); |
| 304 |
| 305 /* open the output file */ |
| 306 out=T_FileStream_open(filename, "w"); |
| 307 if (gencmnFileName != NULL) { |
| 308 uprv_strcpy(gencmnFileName, filename); |
| 309 } |
| 310 if(out==NULL) { |
| 311 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filena
me); |
| 312 exit(U_FILE_ACCESS_ERROR); |
| 313 } |
| 314 |
| 315 /* write the source file */ |
| 316 sprintf(buffer, |
| 317 "/*\n" |
| 318 " * ICU common data table of contents for %s.%s ,\n" |
| 319 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" |
| 320 " */\n\n" |
| 321 "#include \"unicode/utypes.h\"\n" |
| 322 "#include \"unicode/udata.h\"\n" |
| 323 "\n" |
| 324 "/* external symbol declarations for data */\n", |
| 325 name, type); |
| 326 T_FileStream_writeLine(out, buffer); |
| 327 |
| 328 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"",
files[0].pathname); |
| 329 T_FileStream_writeLine(out, buffer); |
| 330 for(i=1; i<fileCount; ++i) { |
| 331 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pa
thname); |
| 332 T_FileStream_writeLine(out, buffer); |
| 333 } |
| 334 T_FileStream_writeLine(out, ";\n\n"); |
| 335 |
| 336 sprintf( |
| 337 buffer, |
| 338 "U_EXPORT struct {\n" |
| 339 " uint16_t headerSize;\n" |
| 340 " uint8_t magic1, magic2;\n" |
| 341 " UDataInfo info;\n" |
| 342 " char padding[%lu];\n" |
| 343 " uint32_t count, reserved;\n" |
| 344 " struct {\n" |
| 345 " const char *name;\n" |
| 346 " const void *data;\n" |
| 347 " } toc[%lu];\n" |
| 348 "} U_EXPORT2 %s_dat = {\n" |
| 349 " 32, 0xda, 0x27, {\n" |
| 350 " %lu, 0,\n" |
| 351 " %u, %u, %u, 0,\n" |
| 352 " {0x54, 0x6f, 0x43, 0x50},\n" |
| 353 " {1, 0, 0, 0},\n" |
| 354 " {0, 0, 0, 0}\n" |
| 355 " },\n" |
| 356 " \"\", %lu, 0, {\n", |
| 357 (unsigned long)32-4-sizeof(UDataInfo), |
| 358 (unsigned long)fileCount, |
| 359 entrypointName, |
| 360 (unsigned long)sizeof(UDataInfo), |
| 361 U_IS_BIG_ENDIAN, |
| 362 U_CHARSET_FAMILY, |
| 363 U_SIZEOF_UCHAR, |
| 364 (unsigned long)fileCount |
| 365 ); |
| 366 T_FileStream_writeLine(out, buffer); |
| 367 |
| 368 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix
?symPrefix:"", files[0].pathname); |
| 369 T_FileStream_writeLine(out, buffer); |
| 370 for(i=1; i<fileCount; ++i) { |
| 371 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, sy
mPrefix?symPrefix:"", files[i].pathname); |
| 372 T_FileStream_writeLine(out, buffer); |
| 373 } |
| 374 |
| 375 T_FileStream_writeLine(out, "\n }\n};\n"); |
| 376 T_FileStream_close(out); |
| 377 |
| 378 uprv_free(symPrefix); |
| 379 } |
| 380 } |
| 381 |
| 382 static void |
| 383 addFile(const char *filename, const char *name, const char *source, UBool source
TOC, UBool verbose) { |
| 384 char *s; |
| 385 uint32_t length; |
| 386 char *fullPath = NULL; |
| 387 |
| 388 if(fileCount==fileMax) { |
| 389 fileMax += CHUNK_FILE_COUNT; |
| 390 files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never free
d. */ |
| 391 if(files==NULL) { |
| 392 fprintf(stderr, "pkgdata/gencmn: Could not allocate %ld bytes for %d fil
es\n", (fileMax*sizeof(files[0])), fileCount); |
| 393 exit(U_MEMORY_ALLOCATION_ERROR); |
| 394 } |
| 395 } |
| 396 |
| 397 if(!sourceTOC) { |
| 398 FileStream *file; |
| 399 |
| 400 if(uprv_pathIsAbsolute(filename)) { |
| 401 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style
paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur
.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename); |
| 402 exit(U_ILLEGAL_ARGUMENT_ERROR); |
| 403 } |
| 404 fullPath = pathToFullPath(filename, source); |
| 405 |
| 406 /* store the pathname */ |
| 407 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); |
| 408 s=allocString(length); |
| 409 uprv_strcpy(s, name); |
| 410 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); |
| 411 uprv_strcat(s, filename); |
| 412 |
| 413 /* get the basename */ |
| 414 fixDirToTreePath(s); |
| 415 files[fileCount].basename=s; |
| 416 files[fileCount].basenameLength=length; |
| 417 |
| 418 files[fileCount].pathname=fullPath; |
| 419 |
| 420 basenameTotal+=length; |
| 421 |
| 422 /* try to open the file */ |
| 423 file=T_FileStream_open(fullPath, "rb"); |
| 424 if(file==NULL) { |
| 425 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath)
; |
| 426 exit(U_FILE_ACCESS_ERROR); |
| 427 } |
| 428 |
| 429 /* get the file length */ |
| 430 length=T_FileStream_size(file); |
| 431 if(T_FileStream_error(file) || length<=20) { |
| 432 fprintf(stderr, "gencmn: unable to get length of listed file %s\n",
fullPath); |
| 433 exit(U_FILE_ACCESS_ERROR); |
| 434 } |
| 435 |
| 436 T_FileStream_close(file); |
| 437 |
| 438 /* do not add files that are longer than maxSize */ |
| 439 if(maxSize && length>maxSize) { |
| 440 if (verbose) { |
| 441 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length,
(long)maxSize); |
| 442 } |
| 443 return; |
| 444 } |
| 445 files[fileCount].fileSize=length; |
| 446 } else { |
| 447 char *t; |
| 448 |
| 449 /* get and store the basename */ |
| 450 /* need to include the package name */ |
| 451 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); |
| 452 s=allocString(length); |
| 453 uprv_strcpy(s, name); |
| 454 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); |
| 455 uprv_strcat(s, filename); |
| 456 fixDirToTreePath(s); |
| 457 files[fileCount].basename=s; |
| 458 |
| 459 |
| 460 /* turn the basename into an entry point name and store in the pathname
field */ |
| 461 t=files[fileCount].pathname=allocString(length); |
| 462 while(--length>0) { |
| 463 if(*s=='.' || *s=='-' || *s=='/') { |
| 464 *t='_'; |
| 465 } else { |
| 466 *t=*s; |
| 467 } |
| 468 ++s; |
| 469 ++t; |
| 470 } |
| 471 *t=0; |
| 472 } |
| 473 ++fileCount; |
| 474 } |
| 475 |
| 476 static char * |
| 477 allocString(uint32_t length) { |
| 478 uint32_t top=stringTop+length; |
| 479 char *p; |
| 480 |
| 481 if(top>STRING_STORE_SIZE) { |
| 482 fprintf(stderr, "gencmn: out of memory\n"); |
| 483 exit(U_MEMORY_ALLOCATION_ERROR); |
| 484 } |
| 485 p=stringStore+stringTop; |
| 486 stringTop=top; |
| 487 return p; |
| 488 } |
| 489 |
| 490 static char * |
| 491 pathToFullPath(const char *path, const char *source) { |
| 492 int32_t length; |
| 493 int32_t newLength; |
| 494 char *fullPath; |
| 495 int32_t n; |
| 496 |
| 497 length = (uint32_t)(uprv_strlen(path) + 1); |
| 498 newLength = (length + 1 + (int32_t)uprv_strlen(source)); |
| 499 fullPath = uprv_malloc(newLength); |
| 500 if(source != NULL) { |
| 501 uprv_strcpy(fullPath, source); |
| 502 uprv_strcat(fullPath, U_FILE_SEP_STRING); |
| 503 } else { |
| 504 fullPath[0] = 0; |
| 505 } |
| 506 n = (int32_t)uprv_strlen(fullPath); |
| 507 uprv_strcat(fullPath, path); |
| 508 |
| 509 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) |
| 510 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) |
| 511 /* replace tree separator (such as '/') with file sep char (such as ':' or '
\\') */ |
| 512 for(;fullPath[n];n++) { |
| 513 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) { |
| 514 fullPath[n] = U_FILE_SEP_CHAR; |
| 515 } |
| 516 } |
| 517 #endif |
| 518 #endif |
| 519 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) |
| 520 /* replace tree separator (such as '/') with file sep char (such as ':' or '
\\') */ |
| 521 for(;fullPath[n];n++) { |
| 522 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) { |
| 523 fullPath[n] = U_FILE_SEP_CHAR; |
| 524 } |
| 525 } |
| 526 #endif |
| 527 return fullPath; |
| 528 } |
| 529 |
| 530 static int |
| 531 compareFiles(const void *file1, const void *file2) { |
| 532 /* sort by basename */ |
| 533 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename); |
| 534 } |
| 535 |
| 536 static void |
| 537 fixDirToTreePath(char *s) |
| 538 { |
| 539 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FIL
E_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)) |
| 540 char *t; |
| 541 #endif |
| 542 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) |
| 543 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) { |
| 544 *t = U_TREE_ENTRY_SEP_CHAR; |
| 545 } |
| 546 #endif |
| 547 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_E
NTRY_SEP_CHAR) |
| 548 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) { |
| 549 *t = U_TREE_ENTRY_SEP_CHAR; |
| 550 } |
| 551 #endif |
| 552 } |
OLD | NEW |